Reproducible Supplement

For manuscript “Development and validation of predictive models of early immune effector cell-associated hematotoxicity (eIPMs)” by Liang et al., 2024

Emily C. Liang, MD (University of Washington and Fred Hutchinson Cancer Center)
2024-07-02

Load the required packages

Show code

Automated ICAHT grading

Import data

Show code
# Clinical data
df <-
  read.xlsx(
    here(
      "dfs",
      "EPIC Data Export",
      "2024.05.29.PatientDemographics for Supplement.xlsx"
    ),
    detectDates = TRUE
  )
df <- clean_names(df)
df <- df %>%
  filter(cart_num == 1)

# CBC data
df_cbc <-
  read.xlsx(here("dfs", "EPIC Data Export", "2024.05.29.hems.xlsx"),
            detectDates = TRUE)
df_cbc <- clean_names(df_cbc)
df_cbc <- df_cbc %>%
  filter(cart_num == 1)

# Fill in missing death dates in df from LTFU datasets
df_death_LTFU <-
  read.xlsx(
    here(
      "dfs",
      "Clinical Datasets",
      "IMTXLTFU-10039_Cause_of_Death_230926.xlsx"
    ),
    detectDates = TRUE
  ) %>%
  clean_names()

df_LTFU_additional <-
  read.xlsx(
    here(
      "dfs",
      "Clinical Datasets",
      "Death and Progression Updates LTFU 240313.xlsx"
    ),
    detectDates = TRUE
  ) %>%
  clean_names()

df_death_LTFU <- df_death_LTFU %>%
  separate(col = upn,
           into = c("initials", "upn"),
           sep = " ") %>%
  select(upn, date_of_death, cause_of_death) %>%
  mutate(date_of_death = as.Date(date_of_death)) %>%
  rename(deathdat = date_of_death)

df <- rows_patch(df,
                 df_death_LTFU %>% select(upn, deathdat),
                 by = "upn",
                 unmatched = "ignore")
df <- rows_patch(
  df,
  df_LTFU_additional %>% select(upn, deathdat),
  by = "upn",
  unmatched = "ignore"
)

# Make the date of last follow-up the latest of: 1) date of death, 2) date of last follow-up from LTFU, 3) date of last follow-up from EMR
df_LTFU <-
  read.xlsx(here("dfs", "Clinical Datasets", "LTFU 240529.xlsx"),
            detectDates = TRUE) %>%
  clean_names()

df_fu_LTFU <- df_LTFU %>%
  select(upn, date_of_latest_progress_clinic_note) %>%
  filter(!is.na(date_of_latest_progress_clinic_note)) %>%
  drop_na() %>%
  group_by(upn) %>%
  dplyr::summarize(date_of_latest_progress_clinic_note = max(date_of_latest_progress_clinic_note))

df <- df %>%
  left_join(., df_fu_LTFU, by = "upn") %>%
  rowwise() %>%
  mutate(datelast =
           max(
             deathdat,
             date_of_latest_progress_clinic_note,
             datelast,
             na.rm = TRUE
           ))

Create data frame with longitudinal daily ANC values (including interpolating missing data)

Show code
# Exclude patients who died prior to day +14
df <- df %>%
  filter(deathdat - cart_date >= 14 | is.na(deathdat))

# Create data frame with longitudinal ANC values
# If last lab draw occurs after 23:00, assign ANC value to the following day
df_ancx <- df_cbc %>%
  filter(upn %in% df$upn) %>%
  select(upn, cart_date, date, time, ancx) %>%
  mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
         new_date  = as.Date(ifelse(
           time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
           date + days(1),
           date
         ))) %>%
  ungroup()

df_ancx <- df_ancx %>% 
  select(-date, -time) %>% 
  rename(date = new_date)

df_ancx <- df_ancx %>% 
  mutate(time_post_inf = as.numeric(date - cart_date)) %>%
  filter(time_post_inf %in% 0:30) %>%
  distinct(upn, time_post_inf, ancx, .keep_all = TRUE) %>% # Keep unique combinations of upn, time_post_inf, and anc
  group_by(upn, time_post_inf) %>% 
  slice_min(ancx) %>% # For each combination of upn and time_post_inf, keep the lowest ANC value
  ungroup()

# Fill in missing time_post_inf and replicate UPNs by first creating df_extract
df_extract <- df %>%
  distinct(upn, cart_date, datelast)

# Create replicate_ids() to replicate UPNs according to duration of follow-up
replicate_ids <- function(upn, cart_date, datelast) {
  if (is.na(datelast) || (datelast - cart_date) >= 30) {
    return(replicate(31, upn))
  } else {
    return(replicate(datelast - cart_date + 1, upn)) # Add 1 to account for day +0
  }
}

# Create df_id with replicated UPNs
df_id <- df_extract %>%
  dplyr::rowwise() %>%
  do(data.frame(upn = replicate_ids(.$upn, .$cart_date, .$datelast))) %>% 
  group_by(upn) %>% 
  mutate(time_post_inf = row_number() - 1) %>% 
  ungroup()

# Merge df_id with df
df_ancx <- df_ancx %>%
  group_by(upn) %>%
  merge(.,
        df_id,
        by = c("upn", "time_post_inf"),
        all.y = TRUE) %>% 
  ungroup()

# Fill in cart_date and date columns
df_ancx <- df_ancx %>%
  group_by(upn) %>%
  mutate(cart_date = as.Date(ifelse(
    is.na(cart_date), max(cart_date, na.rm = TRUE), cart_date
  )),
  date = as.Date(cart_date + time_post_inf)) %>% 
  ungroup()

# Linearly interpolate missing ANC values for up to 7 consecutive NAs
df_ancx <- df_ancx %>%
  arrange(upn, time_post_inf) %>%
  group_by(upn) %>% 
  mutate(ancx = na.approx(ancx, maxgap = 7, rule = 2)) %>% 
  ungroup()

# Round ANC values to nearest integer divisible by 10 (like real ANC values)
df_ancx <- df_ancx %>% 
  mutate(ancx = round(ancx/10) * 10)

# Exclude those with missing ANCs after interpolation
df_ancx <- df_ancx %>%
  group_by(upn) %>%
  mutate(sumNA = sum(is.na(ancx))) %>%
  filter(sumNA == 0) %>% 
  ungroup()

# Add UWIDs
df_ancx <- df_ancx %>%
  inner_join(., df %>% select(upn, uwid), by = "upn") %>%
  select(upn, uwid, cart_date, date, time_post_inf, ancx)

# Log10-transform ANC and select variables for clustering
df_anc <- df_ancx %>%
  mutate(anc = ifelse(ancx == 0, 0.01, ancx),
         anc = log10(anc)) %>%
  select(upn, time_post_inf, anc)

var_label(df_anc$time_post_inf) <- "Days after CAR T-cell infusion"
var_label(df_anc$anc) <- "ANC"

Automatically assign eICAHT grades using {heatwaveR}

Show code
# For eICAHT threshold of ANC ≤ 500 cells/μL
df_exceedances_below_501 <-
  # ddply splits a data frame, applies a function, and combines results into a data frame
  plyr::ddply(.data = df_ancx, .variables = c("upn", "uwid"), .fun = function(data) {
    heatwaveR::exceedance(
      data,
      x = date,
      y = ancx,
      threshold = 501,
      below = TRUE, # Whether to detect events below the threshold,
      minDuration = 1, # Minimum duration for acceptance of detected events
      joinAcrossGaps = TRUE, # Whether to join events which occur before/after gap
      maxGap = 2 # Maximum length of gap allowed for joining of events
    )$exceedance
  })

# Create data frame containing maximum duration of events for each UPN
df_anc_500 <- df_exceedances_below_501 %>%
  select(upn, uwid, exceedance_no, duration) %>%
  group_by(upn, uwid) %>%
  dplyr::summarize(duration_below_501_max = max(duration)) %>%
  mutate(duration_below_501_max = ifelse( # Replace NAs with 0 (never below threshold)
    is.na(duration_below_501_max),
    0,
    duration_below_501_max
  ))

# For eICAHT threshold of ANC ≤ 100 cells/μL
df_exceedances_below_101 <-
  plyr::ddply(.data = df_ancx, .variables = c("upn"), .fun = function(data) {
    heatwaveR::exceedance(
      data,
      x = date,
      y = ancx,
      threshold = 101,
      below = TRUE,
      minDuration = 1,
      joinAcrossGaps = TRUE,
      maxGap = 2,
      maxPadLength = FALSE
    )$exceedance
  })

df_anc_100 <- df_exceedances_below_101 %>%
  select(upn, exceedance_no, duration) %>%
  group_by(upn) %>%
  dplyr::summarize(duration_below_101_max = max(duration)) %>%
  mutate(duration_below_101_max = ifelse(
    is.na(duration_below_101_max),
    0,
    duration_below_101_max
  ))

# Create data frame with exceedances below each threshold for each UPN
df_icaht <- df_anc_500 %>% 
  left_join(., df_anc_100, by = "upn") %>% 
  ungroup()

# Create column with ICAHT grades
df_icaht <- df_icaht %>%
  mutate(
    icaht_grade = case_when(
      duration_below_501_max == 0 & duration_below_101_max == 0 ~ "Grade 0",
      duration_below_501_max %in% 1:6 & duration_below_101_max < 7 ~ "Grade 1",
      duration_below_501_max %in% 7:13 & duration_below_101_max < 7 ~ "Grade 2",
      (duration_below_501_max %in% 14:30 & duration_below_101_max < 7) |
        (duration_below_501_max < 31 & duration_below_101_max %in% 7:13) ~ "Grade 3",
      (duration_below_501_max >= 31 & duration_below_101_max < 14) | duration_below_101_max >= 14 ~ "Grade 4"
    )
  )

Additionally define grade 4 ICAHT

Show code
df_exceedances_below_501 <- df_exceedances_below_501 %>%
  filter(!is.na(exceedance_no)) %>%  # Filter out patients who never had neutropenia
  select(upn, 
         exceedance_below_501_no = exceedance_no, 
         exceedance_below_501_date_start = date_start,
         exceedance_below_501_date_end = date_end) %>% 
  mutate(exceedance_below_501_date_start = as.Date(exceedance_below_501_date_start), # Convert dates to class "Date"
         exceedance_below_501_date_end = as.Date(exceedance_below_501_date_end))

# Add date of last available ANC value
df_last_time_post_inf <- df_ancx %>%
  arrange(upn, time_post_inf) %>%
  group_by(upn) %>% 
  slice_tail(n = 1) %>% # Select last row
  ungroup() %>% 
  select(upn, 
         cart_date,
         last_lab_date = date)

df_exceedances_below_501 <- df_exceedances_below_501 %>% 
  left_join(., df_last_time_post_inf, by = "upn")

# Classify patients who had exceedances below ANC ≤ 500 cells/μL 
# starting between days 0-3 and ending on "last_lab_date" as grade 4 early ICAHT
df_exceedances_below_501 <- df_exceedances_below_501 %>%
  mutate(
    icaht_grade_4 = ifelse(
      exceedance_below_501_date_start - cart_date <= 3 &
        exceedance_below_501_date_end == last_lab_date,
      "Yes",
      "No"
    )
  )

df_icaht <- df_icaht %>%
  left_join(.,
            df_exceedances_below_501 %>% select(upn, icaht_grade_4),
            by = "upn")

df_icaht <- df_icaht %>%
  mutate(
    icaht_grade = dplyr::if_else(
      icaht_grade_4 == "Yes",
      "Grade 4",
      icaht_grade,
      missing = icaht_grade
    )
  ) %>%
  distinct(upn,
           icaht_grade,
           .keep_all = TRUE)

Update ICAHT grades for patients who were included Liang, Rejeski, et al., Bone Marrow Transplantation 2024 (n = 605)

Show code
# Data frame contains automated grades assigned by {heatwaveR} approach, MSKCC manual code, and manual grades for discrepancies
df_check <-
  read.xlsx(here("dfs",
                 "Manual vs. Automated Early ICAHT Grades.xlsx")) %>% 
  clean_names()

# Filter to cases incorrectly graded by {heatwaveR} approach
df_check <- df_check %>% 
  filter(center == "FHCC",
         disagreement_manual_heatwave_r == 1) %>% 
  select(upn = patient_id,
         manual_icaht_grade) %>% 
  mutate(manual_icaht_grade = paste0("Grade ", manual_icaht_grade))

# Replace grades in df_icaht with correct grades
df_icaht <- df_icaht %>% 
  left_join(., df_check, by = "upn")

df_icaht <- df_icaht %>%
  mutate(icaht_grade = ifelse(!is.na(manual_icaht_grade), manual_icaht_grade, icaht_grade)) %>% 
  select(-icaht_grade_4, -manual_icaht_grade)

# Add ICAHT grades to df
df <- df %>% 
  filter(upn %in% df_icaht$upn) %>% 
  left_join(., df_icaht %>% select(upn, icaht_grade), by = "upn")

df <- df %>% 
  mutate(icaht_grade_num = case_when(icaht_grade == "Grade 0" ~ 0,
                                     icaht_grade == "Grade 1" ~ 1,
                                     icaht_grade == "Grade 2" ~ 2,
                                     icaht_grade == "Grade 3" ~ 3,
                                     icaht_grade == "Grade 4" ~ 4))

Import/wrangle additional data

Clinical data

Show code
df <- df %>%
  mutate(
    disease_cat = 
      factor(disease_cat,
      levels = c("Indolent NHL", "Aggressive NHL", "ALL", "MM/PCL")),
    product_cat = as.factor(product_cat),
    car_target = case_when(
      product_cat == "Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel)" |
        product_cat == "Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel)" |
        product_infused == "JCAR014" |
        product_infused == "JCAR021" ~ "CD19",
      product_infused == "Investigational CD20 product" ~ "CD20",
      .default = "BCMA"
    ),
    car_target = factor(car_target,
      levels = c("BCMA", "CD19", "CD20")),
    racenih_cat = ifelse(racenih == "White", "White", "Non-White"),
    racenih_cat = fct_relevel(as.factor(racenih_cat), "White"),
    ethnih_cat = ifelse(
      ethnih == "Hispanic or Latino",
      "Hispanic or Latino",
      "Not Hispanic/Latino or Unknown"
    ),
    ethnih = fct_relevel(as.factor(ethnih_cat), "Not Hispanic/Latino or Unknown"),
    icaht_grade = fct_relevel(icaht_grade, "Grade 0"),
    icaht_grade_3_4 = ifelse(icaht_grade_num %in% 3:4, 1, 0),
    prior_hct = ifelse(!is.na(txdate1) &
                         txdate1 < cart_date, "Yes", "No")
  )

df <- df %>%
  select(
    upn,
    cart_num,
    cart_date,
    cart_age,
    gender_desc,
    racenih,
    racenih_cat,
    ethnih,
    ethnih_cat,
    disease_cat,
    prior_hct,
    product_cat,
    product_infused,
    car_target,
    costim_domain,
    datelast,
    deathdat,
    icaht_grade,
    icaht_grade_num,
    icaht_grade_3_4
  )

CRS/ICANS data

Show code
df_tox <-
  read.xlsx(here("dfs", "CAR Toxicity Data 240529.xlsx"),
            detectDates = TRUE) %>%
  clean_names()

df <- df %>% 
  left_join(., df_tox %>% select(upn, crs_grade, icans_grade), by = "upn")

var_label(df$cart_age) <- "Age"
var_label(df$gender_desc) <- "Sex"
var_label(df$racenih) <- "Race"
var_label(df$racenih_cat) <- "Race"
var_label(df$ethnih) <- "Ethnicity"
var_label(df$ethnih_cat) <- "Ethnicity"
var_label(df$disease_cat) <- "Disease"
var_label(df$prior_hct) <- "Prior HCT"
var_label(df$product_cat) <- "CAR T-cell product category"
var_label(df$product_infused) <- "CAR T-cell product"
var_label(df$car_target) <- "CAR target"
var_label(df$costim_domain) <- "Costimulatory domain"
var_label(df$crs_grade) <- "CRS grade"
var_label(df$icans_grade) <- "ICANS grade"
var_label(df$icaht_grade) <- "ICAHT grade"

Lymphodepletion regimen data

Show code
# Regimen
df_LD <-
  read.xlsx(
    here(
      "dfs",
      "EPIC Data Export",
      "2024.05.29.lymphodepletionRegimen.xlsx"
    ),
    detectDates = TRUE
  ) %>%
  clean_names()
df_LD <- df_LD %>% filter(cart_num == 1)

df_LD <- df_LD %>%
  mutate(LD_regimen = case_when(
    grepl("cyclophosphamide", dep_regimen, ignore.case = TRUE) &
      grepl("fludarabine", dep_regimen, ignore.case = TRUE) ~ "Cy/Flu",
    .default = "Other"
  ))

# Dose
df_LD_dose <-
  read.xlsx(here("dfs",
                 "EPIC Data Export",
                 "2024.05.29.treatmentNotes.xlsx"),
            detectDates = TRUE) %>%
  clean_names()
df_LD_dose <- df_LD_dose %>% filter(cart_num == 1)

df_LD_dose <- df_LD_dose %>%
  group_by(upn) %>%
  mutate(LD_dose = paste(condition_notes, collapse = " ")) %>% 
  slice_head(n = 1)

# Join data frames
df_LD <- df_LD %>% 
  left_join(df_LD_dose %>% select(upn, LD_dose), by = "upn")

# Categorize lymphodepletion regimens
# Define high-intensity Cy/Flu regimens
high_cy_flu <-
  c(
    "Cyclophosphamide 500 mg/msq/day",
    "Cyclophosphamide 60 mg/kg x 1 dose",
    "Cyclophosphamide 60 mg/kg IV x 1 dose",
    "Cyclophosphamide 30 mg/kg x 1 dose"
  )

df_LD <- df_LD %>%
  mutate(
    LD_regimen_cat = case_when(
      LD_regimen == "Cy/Flu" &
        grepl(paste(high_cy_flu, collapse = "|"), LD_dose, ignore.case = TRUE) ~ "High-intensity Cy/Flu",
      LD_regimen == "Cy/Flu" ~ "Low-intensity Cy/Flu",
      .default = "Other"
    )
  )

# Add LD regimen data to df
df <- df %>% 
  left_join(., df_LD %>% select(upn, LD_regimen_cat), by = "upn")

df <- df %>%
  mutate(
    LD_regimen_cat = fct_relevel(
      LD_regimen_cat,
      c("Low-intensity Cy/Flu", "High-intensity Cy/Flu", "Other")
    ),
    LD_regimen_low_CyFlu_vs_other = ifelse(
      LD_regimen_cat == "Low-intensity Cy/Flu",
      "Low-intensity Cy/Flu",
      "Other"
    ),
    LD_regimen_low_CyFlu_vs_other = factor(
      LD_regimen_low_CyFlu_vs_other,
      levels = c("Low-intensity Cy/Flu", "Other")
    )
  )

var_label(df$LD_regimen_cat) <- "Lymphodepletion regimen"
var_label(df$LD_regimen_low_CyFlu_vs_other) <- "Lymphodepletion regimen (low-dose Cy/Flu vs. other)"

Laboratory data

Show code
# If last CBC draw occurs after 23:00, assign value to the following day
df_cbc <- df_cbc %>%
  mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
         date  = as.Date(ifelse(
           time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
           date + 1,
           date
         )),
         day = as.numeric(date - cart_date)) %>%
  ungroup()

# Other labs
df_labs <-
  read.xlsx(here("dfs", "EPIC Data Export", "2024.05.29.labs.xlsx"),
            detectDates = TRUE) %>%
  clean_names()

df_labs <- df_labs %>%
  filter(cart_num == 1)

df_labs <- df_labs %>%
  mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
         new_date  = as.Date(ifelse(
           time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
           date + 1,
           date
         )),
         new_day = as.numeric(new_date - cart_date)) %>%
  ungroup()

# Pre-lymphodepletion labs
df_pre_ld <-
  read.xlsx(
    here(
      "dfs",
      "EPIC Data Export",
      "2024.05.29.prelymphodepletionLabsHems.xlsx"
    ),
    detectDates = TRUE
  ) %>%
  clean_names()

df_pre_ld <- df_pre_ld %>%
  filter(cart_num == 1)

## Add pre-LD ANC, ALC, Hb, platelet count, and LDH to df
df <- df_pre_ld %>%
  mutate(
    anc_pre_ld = ancx_pre_d / 1000,
    lym_pre_ld = lymx_pre_d / 1000,
    hb_pre_ld = hgb_amt_pre_d,
    plt_pre_ld = tot_platelet_cnt_pre_d / 1000
  ) %>%
  select(upn, anc_pre_ld, lym_pre_ld, hb_pre_ld, plt_pre_ld, ldh_pre_ld = ldh_u_l_pre_d) %>%
  left_join(df, ., by = "upn")

var_label(df$anc_pre_ld) <- "Pre-LD ANC"
var_label(df$hb_pre_ld) <- "Pre-LD Hb"
var_label(df$plt_pre_ld) <- "Pre-LD platelet count"

# Add day +0 LDH to df
# If there are multiple day +0 values, use the earliest one (corresponding to pre-infusion value)
df <- df_labs %>%
  filter(day == 0, !is.na(ldh_u_l)) %>%
  select(upn, day, time, ldh_u_l) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, ldh_u_l) %>%
  rename(ldh_day_0 = ldh_u_l) %>%
  left_join(df, ., by = "upn")

# Add pre-LD, day +0, day +3, day +5, day +7, and peak CRP to df (where peak is between days +0 and +30)
# For day +3, use values between days +2 through +4
# If there are multiple day +0 values, use the earliest one (corresponding to pre-infusion value)
# For the other timepoints: if there are multiple values, use the peak/nadir value (depending on the laboratory test of interest)
df <- df_pre_ld %>%
  select(upn, crp_mg_l_pre_d) %>% 
  rename(crp_pre_ld = crp_mg_l_pre_d) %>% 
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 0, !is.na(crp_mg_l)) %>%
  select(upn, day, time, crp_mg_l) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, crp_mg_l) %>%
  rename(crp_day_0 = crp_mg_l) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 2:4, !is.na(crp_mg_l)) %>%
  select(upn, day, crp_mg_l) %>%
  group_by(upn) %>%
  dplyr::summarize(crp_day_3 = max(crp_mg_l)) %>%
  select(upn, crp_day_3) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 5, !is.na(crp_mg_l)) %>%
  select(upn, day, crp_mg_l) %>%
  group_by(upn) %>%
  dplyr::summarize(crp_day_5 = max(crp_mg_l)) %>%
  select(upn, crp_day_5) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 7, !is.na(crp_mg_l)) %>%
  select(upn, day, crp_mg_l) %>%
  group_by(upn) %>%
  dplyr::summarize(crp_day_7 = max(crp_mg_l)) %>%
  select(upn, crp_day_7) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 0:30, !is.na(crp_mg_l)) %>%
  select(upn,
         crp_mg_l) %>%
  group_by(upn) %>%
  dplyr::summarize(crp_max = max(crp_mg_l)) %>%
  left_join(df, ., by = "upn")

# Add pre-LD, day +0, day +3, day +5, day +7, and peak ferritin to df
df <- df_pre_ld %>%
  select(upn, ferritin_ng_ml_pre_d) %>% 
  rename(ferritin_pre_ld = ferritin_ng_ml_pre_d) %>% 
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 0, !is.na(ferritin_ng_ml)) %>%
  select(upn, day, time, ferritin_ng_ml) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, ferritin_ng_ml) %>%
  rename(ferritin_day_0 = ferritin_ng_ml) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 2:4, !is.na(ferritin_ng_ml)) %>%
  select(upn, day, ferritin_ng_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(ferritin_day_3 = max(ferritin_ng_ml)) %>%
  select(upn, ferritin_day_3) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 5, !is.na(ferritin_ng_ml)) %>%
  select(upn, day, ferritin_ng_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(ferritin_day_5 = max(ferritin_ng_ml)) %>%
  select(upn, ferritin_day_5) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 7, !is.na(ferritin_ng_ml)) %>%
  select(upn, day, ferritin_ng_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(ferritin_day_7 = max(ferritin_ng_ml)) %>%
  select(upn, ferritin_day_7) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 0:30, !is.na(ferritin_ng_ml)) %>%
  select(upn,
         ferritin_ng_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(ferritin_max = max(ferritin_ng_ml)) %>%
  left_join(df, ., by = "upn")

# Add pre-LD, day +0, day +3, day +5, day +7, and peak IL-6 to df
df <- df_pre_ld %>%
  select(upn, il6_pg_ml_pre_d) %>% 
  rename(il6_pre_ld = il6_pg_ml_pre_d) %>% 
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 0, !is.na(il6_pg_ml)) %>%
  select(upn, day, time, il6_pg_ml) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, il6_pg_ml) %>%
  rename(il6_day_0 = il6_pg_ml) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 2:4, !is.na(il6_pg_ml)) %>%
  select(upn, day, il6_pg_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(il6_day_3 = max(il6_pg_ml)) %>%
  select(upn, il6_day_3) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 5, !is.na(il6_pg_ml)) %>%
  select(upn, day, il6_pg_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(il6_day_5 = max(il6_pg_ml)) %>%
  select(upn, il6_day_5) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 7, !is.na(il6_pg_ml)) %>%
  select(upn, day, il6_pg_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(il6_day_7 = max(il6_pg_ml)) %>%
  select(upn, il6_day_7) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 0:30, !is.na(il6_pg_ml)) %>%
  select(upn, il6_pg_ml) %>%
  group_by(upn) %>%
  dplyr::summarize(il6_max = max(il6_pg_ml)) %>%
  left_join(df, ., by = "upn")

# Add pre-LD, day +0, day +3, day +5, day + 7, and minimum fibrinogen to df
df <- df_pre_ld %>%
  select(upn, fibrinogen_mg_dl_pre_d) %>% 
  rename(fibrinogen_pre_ld = fibrinogen_mg_dl_pre_d) %>% 
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 0, !is.na(fibrinogen_mg_dl)) %>%
  select(upn, day, time, fibrinogen_mg_dl) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, fibrinogen_mg_dl) %>%
  rename(fibrinogen_day_0 = fibrinogen_mg_dl) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 2:4, !is.na(fibrinogen_mg_dl)) %>%
  select(upn, day, fibrinogen_mg_dl) %>%
  group_by(upn) %>%
  dplyr::summarize(fibrinogen_day_3 = max(fibrinogen_mg_dl)) %>%
  select(upn, fibrinogen_day_3) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 5, !is.na(fibrinogen_mg_dl)) %>%
  select(upn, day, fibrinogen_mg_dl) %>%
  group_by(upn) %>%
  dplyr::summarize(fibrinogen_day_5 = max(fibrinogen_mg_dl)) %>%
  select(upn, fibrinogen_day_5) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 7, !is.na(fibrinogen_mg_dl)) %>%
  select(upn, day, fibrinogen_mg_dl) %>%
  group_by(upn) %>%
  dplyr::summarize(fibrinogen_day_7 = max(fibrinogen_mg_dl)) %>%
  select(upn, fibrinogen_day_7) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 0:30, !is.na(fibrinogen_mg_dl)) %>%
  select(upn,
         fibrinogen_mg_dl) %>%
  group_by(upn) %>%
  dplyr::summarize(fibrinogen_min = min(fibrinogen_mg_dl)) %>%
  left_join(df, ., by = "upn")

# Add pre-LD, day +0, day +3, day +5, day + 7, and peak D-dimer to df
df <- df_pre_ld %>%
  select(upn, d_dimer_pre_d) %>% 
  rename(d_dimer_pre_ld = d_dimer_pre_d) %>% 
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 0, !is.na(d_dimer)) %>%
  select(upn, day, time, d_dimer) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, d_dimer) %>%
  rename(d_dimer_day_0 = d_dimer) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 2:4, !is.na(d_dimer)) %>%
  select(upn, day, d_dimer) %>%
  group_by(upn) %>%
  dplyr::summarize(d_dimer_day_3 = max(d_dimer)) %>%
  select(upn, d_dimer_day_3) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 5, !is.na(d_dimer)) %>%
  select(upn, day, d_dimer) %>%
  group_by(upn) %>%
  dplyr::summarize(d_dimer_day_5 = max(d_dimer)) %>%
  select(upn, d_dimer_day_5) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 7, !is.na(d_dimer)) %>%
  select(upn, day, d_dimer) %>%
  group_by(upn) %>%
  dplyr::summarize(d_dimer_day_7 = max(d_dimer)) %>%
  select(upn, d_dimer_day_7) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 0:30, !is.na(d_dimer)) %>%
  select(upn,
         d_dimer) %>%
  group_by(upn) %>%
  dplyr::summarize(d_dimer_max = max(d_dimer)) %>%
  left_join(df, ., by = "upn")

# Add pre-LD, day +0, day +3, day +5, day + 7, and peak PTT to df
df <- df_pre_ld %>%
  select(upn, ptt_patient_pre_d) %>% 
  rename(ptt_pre_ld = ptt_patient_pre_d) %>% 
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 0, !is.na(ptt_patient)) %>%
  select(upn, day, time, ptt_patient) %>%
  group_by(upn) %>%
  filter(time == min(time)) %>%
  select(upn, ptt_patient) %>%
  rename(ptt_day_0 = ptt_patient) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 2:4, !is.na(ptt_patient)) %>%
  select(upn, day, ptt_patient) %>%
  group_by(upn) %>%
  dplyr::summarize(ptt_day_3 = max(ptt_patient)) %>%
  select(upn, ptt_day_3) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 5, !is.na(ptt_patient)) %>%
  select(upn, day, ptt_patient) %>%
  group_by(upn) %>%
  dplyr::summarize(ptt_day_5 = max(ptt_patient)) %>%
  select(upn, ptt_day_5) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day == 7, !is.na(ptt_patient)) %>%
  select(upn, day, ptt_patient) %>%
  group_by(upn) %>%
  dplyr::summarize(ptt_day_7 = max(ptt_patient)) %>%
  select(upn, ptt_day_7) %>%
  left_join(df, ., by = "upn")

df <- df_labs %>%
  filter(day %in% 0:30, !is.na(ptt_patient)) %>%
  select(upn,
         ptt_patient) %>%
  group_by(upn) %>%
  dplyr::summarize(ptt_max = max(ptt_patient)) %>%
  left_join(df, ., by = "upn")

# Log10-transform laboratory data
df <- df %>%
  mutate(across(anc_pre_ld:ptt_max, ~ifelse(. == 0, 0.01, .), .names = "{.col}_log10")) %>%
  mutate(across(anc_pre_ld_log10:ptt_max_log10, log10))

var_label(df$anc_pre_ld_log10) <- "Pre-LD ANC"
var_label(df$lym_pre_ld_log10) <- "Pre-LD ALC"
var_label(df$hb_pre_ld) <- "Pre-LD Hb"
var_label(df$plt_pre_ld_log10) <- "Pre-LD platelet count"
var_label(df$ldh_pre_ld_log10) <- "Pre-LD LDH"
var_label(df$crp_pre_ld_log10) <- "Pre-LD CRP"
var_label(df$crp_day_0_log10) <- "Day +0 CRP"
var_label(df$crp_day_3_log10) <- "Day +3 CRP"
var_label(df$crp_day_5_log10) <- "Day +5 CRP"
var_label(df$crp_day_7_log10) <- "Day +7 CRP"
var_label(df$crp_max_log10) <- "Peak CRP"
var_label(df$ferritin_pre_ld_log10) <- "Pre-LD ferritin"
var_label(df$ferritin_day_0_log10) <- "Day +0 ferritin"
var_label(df$ferritin_day_3_log10) <- "Day +3 ferritin"
var_label(df$ferritin_day_5_log10) <- "Day +5 ferritin"
var_label(df$ferritin_day_7_log10) <- "Day +7 ferritin"
var_label(df$ferritin_max_log10) <- "Peak ferritin"
var_label(df$il6_pre_ld_log10) <- "Pre-LD IL-6"
var_label(df$il6_day_0_log10) <- "Day +0 IL-6"
var_label(df$il6_day_3_log10) <- "Day +3 IL-6"
var_label(df$il6_day_5_log10) <- "Day +5 IL-6"
var_label(df$il6_day_7_log10) <- "Day +7 IL-6"
var_label(df$il6_max_log10) <- "Peak IL-6"
var_label(df$fibrinogen_pre_ld_log10) <- "Pre-LD fibrinogen"
var_label(df$fibrinogen_day_0_log10) <- "Day +0 fibrinogen"
var_label(df$fibrinogen_day_3_log10) <- "Day +3 fibrinogen"
var_label(df$fibrinogen_day_5_log10) <- "Day +5 fibrinogen"
var_label(df$fibrinogen_day_7_log10) <- "Day +7 fibrinogen"
var_label(df$fibrinogen_min_log10) <- "Nadir fibrinogen"
var_label(df$d_dimer_pre_ld_log10) <- "Pre-LD D-dimer"
var_label(df$d_dimer_day_0_log10) <- "Day +0 D-dimer"
var_label(df$d_dimer_day_3_log10) <- "Day +3 D-dimer"
var_label(df$d_dimer_day_5_log10) <- "Day +5 D-dimer"
var_label(df$d_dimer_day_7_log10) <- "Day +7 D-dimer"
var_label(df$d_dimer_max_log10) <- "Peak D-dimer"
var_label(df$ptt_pre_ld_log10) <- "Pre-LD PTT"
var_label(df$ptt_day_0_log10) <- "Day +0 PTT"
var_label(df$ptt_day_3_log10) <- "Day +3 PTT"
var_label(df$ptt_day_5_log10) <- "Day +5 PTT"
var_label(df$ptt_day_7_log10) <- "Day +7 PTT"
var_label(df$ptt_max_log10) <- "Peak PTT"

Table 1: Patient, disease, and treatment characteristics

Show code
df %>%
  select(
    cart_age,
    gender_desc,
    racenih,
    ethnih,
    prior_hct,
    disease_cat,
    anc_pre_ld,
    hb_pre_ld, 
    plt_pre_ld,
    LD_regimen_cat,
    product_infused,
    product_cat
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()
Characteristic N = 6911
Age
    Median (IQR) 61 (51, 68)
    Range 19 - 84
Sex
    Male 437 (63%)
    Female 254 (37%)
Race
    White 583 (84%)
    Asian 58 (8%)
    Black or African American 18 (3%)
    American Indian or Alaska Native 13 (2%)
    Multiple 8 (1%)
    Unknown 7 (1%)
    Native Hawaiian or other Pacific Islander 4 (1%)
Ethnicity
    Not Hispanic/Latino or Unknown 639 (92%)
    Hispanic or Latino 52 (8%)
Prior HCT 234 (34%)
Disease
    Aggressive NHL 318 (46%)
    Indolent NHL 154 (22%)
    MM/PCL 120 (17%)
    ALL 99 (14%)
Pre-LD ANC
    Median (IQR) 2.84 (1.79, 4.29)
    Range 0.00 - 54.03
    Missing 1
Pre-LD Hb
    Median (IQR) 10.70 (9.50, 12.35)
    Range 7.00 - 17.10
Pre-LD platelet count
    Median (IQR) 142 (89, 202)
    Range 6 - 790
Lymphodepletion regimen
    Low-intensity Cy/Flu 403 (58%)
    High-intensity Cy/Flu 253 (37%)
    Other 35 (5%)
CAR T-cell product
    JCAR014 193 (28%)
    YESCARTA 140 (20%)
    BREYANZI 87 (13%)
    CARVYKTI 52 (8%)
    TECARTUS 47 (7%)
    Investigational CD20 product 45 (7%)
    JCAR021 44 (6%)
    Investigational BCMA product 38 (5%)
    ABECMA 30 (4%)
    KYMRIAH 15 (2%)
CAR T-cell product category
    Investigational CAR T-cell product 320 (46%)
    Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel) 187 (27%)
    Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel) 102 (15%)
    Commercial BCMA CAR T-cell product (cilta-cel, ide-cel) 82 (12%)
1 n (%)

Incidences of early ICAHT by grade

Show code
df %>%
  select(icaht_grade) %>%
  tbl_summary(
    statistic = list(all_categorical() ~ "{n} ({p}%)"),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()
Characteristic N = 6911
ICAHT grade
    Grade 1 366 (53%)
    Grade 0 112 (16%)
    Grade 2 111 (16%)
    Grade 3 67 (10%)
    Grade 4 35 (5%)
1 n (%)

Incidences of CRS and ICANS

Show code
df %>%
  select(crs_grade, icans_grade) %>%
  tbl_summary(statistic = list(all_categorical() ~ "{n} ({p}%)"))
Characteristic N = 6911
CRS grade
    0 175 (25%)
    1 224 (32%)
    2 257 (37%)
    3 23 (3.3%)
    4 10 (1.4%)
    5 2 (0.3%)
ICANS grade
    0 411 (59%)
    1 72 (10%)
    2 84 (12%)
    3 103 (15%)
    4 19 (2.7%)
    5 2 (0.3%)
1 n (%)

Median follow-up and overall survival

Show code
df <- df %>%
  mutate(
    OS_code = ifelse(is.na(deathdat), 0, 1),
    OS_months = as.duration(cart_date %--% datelast) / dmonths(1)
  )

# Create data frame with OS landmarked at day +30
df_landmark_OS <- df %>% 
  filter(OS_months > 30/30.4167) %>% 
  mutate(OS_months_landmark = OS_months - 30/30.4167)

quantile(prodlim(Hist(OS_months, OS_code) ~ 1, data = df, reverse = TRUE))
Quantiles of the potential follow up time distribution based on the Kaplan-Meier method 
applied to the censored times reversing the roles of event status and censored.

Table of quantiles and corresponding confidence limits:
       q quantile lower upper
   <num>    <num> <num> <num>
1:  0.00       NA    NA    NA
2:  0.25    63.28 57.46 72.18
3:  0.50    35.12 28.85 38.21
4:  0.75    12.25 11.33 13.86
5:  1.00     0.92  0.92  0.99

Median time (IQR):35.12 (12.25;63.28)
Show code
surv_OS <- survfit2(Surv(OS_months, OS_code) ~ 1, data = df)
surv_OS
Call: survfit(formula = Surv(OS_months, OS_code) ~ 1, data = df)

       n events median 0.95LCL 0.95UCL
[1,] 691    320   29.4    24.2    38.6
Show code
# 3-year estimate
summary(surv_OS, times = 36)
Call: survfit(formula = Surv(OS_months, OS_code) ~ 1, data = df)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   36    154     285    0.465   0.023        0.422        0.512

Univariate logistic regression models of grade 3-4 ICAHT

Using log10-transformed laboratory values, except pre-LD hemoglobin due to lack of convergence with log10(pre-LD hemoglobin)

Show code
tbl_uvregression <- df %>%
  select(
    icaht_grade_3_4,
    cart_age,
    gender_desc,
    racenih_cat,
    ethnih_cat,
    prior_hct,
    disease_cat,
    LD_regimen_cat,
    LD_regimen_low_CyFlu_vs_other,
    car_target,
    costim_domain,
    crs_grade, 
    icans_grade,
    anc_pre_ld_log10,
    hb_pre_ld,
    plt_pre_ld_log10,
    ldh_pre_ld_log10,
    crp_pre_ld_log10,
    crp_day_0_log10,
    crp_day_3_log10,
    crp_day_5_log10,
    crp_day_7_log10,
    crp_max_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    ferritin_day_5_log10,
    ferritin_day_7_log10,
    ferritin_max_log10,
    il6_pre_ld_log10,
    il6_day_0_log10,
    il6_day_3_log10,
    il6_day_5_log10,
    il6_day_7_log10,
    il6_max_log10,
    fibrinogen_pre_ld_log10,
    fibrinogen_day_0_log10,
    fibrinogen_day_3_log10,
    fibrinogen_day_5_log10,
    fibrinogen_day_7_log10,
    fibrinogen_min_log10,
    d_dimer_pre_ld_log10,
    d_dimer_day_0_log10,
    d_dimer_day_3_log10,
    d_dimer_day_5_log10,
    d_dimer_day_7_log10,
    d_dimer_max_log10,
    ptt_pre_ld_log10,
    ptt_day_0_log10,
    ptt_day_3_log10,
    ptt_day_5_log10,
    ptt_day_7_log10,
    ptt_max_log10
  ) %>%
  tbl_uvregression(
    method = glm,
    y = icaht_grade_3_4,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 2)
  ) %>%
  add_nevent() %>%
  bold_p() %>%
  bold_labels()
tbl_uvregression
Characteristic N Event N OR1 95% CI1 p-value
Age 691 102 0.97 0.96, 0.98 <0.001
Sex 691 102


    Female


    Male

0.84 0.55, 1.30 0.44
Race 691 102


    White


    Non-White

0.76 0.39, 1.37 0.39
Ethnicity 691 102


    Hispanic or Latino


    Not Hispanic/Latino or Unknown

0.55 0.28, 1.12 0.083
Prior HCT 691 102


    No


    Yes

1.25 0.80, 1.92 0.31
Disease 691 102


    Indolent NHL


    Aggressive NHL

0.88 0.49, 1.62 0.67
    ALL

4.06 2.18, 7.76 <0.001
    MM/PCL

0.79 0.36, 1.68 0.55
Lymphodepletion regimen 691 102


    Low-intensity Cy/Flu


    High-intensity Cy/Flu

1.45 0.93, 2.25 0.10
    Other

2.44 1.03, 5.35 0.031
Lymphodepletion regimen (low-dose Cy/Flu vs. other) 691 102


    Low-intensity Cy/Flu


    Other

1.56 1.02, 2.38 0.040
CAR target 691 102


    BCMA


    CD19

1.78 0.98, 3.54 0.076
    CD20

0.64 0.14, 2.15 0.51
Costimulatory domain 691 102


    4-1BB


    CD28

1.02 0.63, 1.62 0.93
    CD28-41BB

0.40 0.09, 1.13 0.13
CRS grade 691 102 2.01 1.60, 2.56 <0.001
ICANS grade 691 102 1.51 1.29, 1.76 <0.001
Pre-LD ANC 690 102 0.15 0.08, 0.24 <0.001
Pre-LD Hb 691 102 0.61 0.52, 0.70 <0.001
Pre-LD platelet count 691 102 0.06 0.03, 0.11 <0.001
Pre-LD LDH 677 101 9.95 4.72, 21.5 <0.001
Pre-LD CRP 439 58 3.44 2.24, 5.43 <0.001
Day +0 CRP 644 85 3.24 2.21, 4.84 <0.001
Day +3 CRP 643 91 3.54 2.30, 5.65 <0.001
Day +5 CRP 444 81 3.36 2.10, 5.57 <0.001
Day +7 CRP 549 77 3.73 2.36, 6.08 <0.001
Peak CRP 683 98 16.7 7.91, 38.4 <0.001
Pre-LD ferritin 486 69 5.57 3.42, 9.51 <0.001
Day +0 ferritin 658 90 9.87 5.82, 17.5 <0.001
Day +3 ferritin 649 94 9.24 5.65, 15.8 <0.001
Day +5 ferritin 445 80 6.74 4.17, 11.5 <0.001
Day +7 ferritin 555 81 5.49 3.65, 8.57 <0.001
Peak ferritin 691 102 6.90 4.80, 10.2 <0.001
Pre-LD IL-6 130 14 4.13 1.58, 11.4 0.004
Day +0 IL-6 336 41 2.95 1.68, 5.24 <0.001
Day +3 IL-6 476 71 1.44 1.11, 1.86 0.005
Day +5 IL-6 356 60 1.56 1.18, 2.07 0.002
Day +7 IL-6 413 59 2.13 1.53, 2.99 <0.001
Peak IL-6 551 79 2.60 2.00, 3.43 <0.001
Pre-LD fibrinogen 378 60 3.87 0.53, 29.4 0.19
Day +0 fibrinogen 606 79 7.19 1.13, 46.4 0.037
Day +3 fibrinogen 613 85 3.85 0.66, 23.2 0.14
Day +5 fibrinogen 404 69 0.80 0.16, 4.14 0.79
Day +7 fibrinogen 508 70 0.68 0.17, 2.78 0.58
Nadir fibrinogen 661 94 0.07 0.03, 0.18 <0.001
Pre-LD D-dimer 341 57 2.97 1.50, 5.92 0.002
Day +0 D-dimer 557 72 4.85 2.65, 9.00 <0.001
Day +3 D-dimer 561 79 4.20 2.41, 7.41 <0.001
Day +5 D-dimer 373 63 4.01 2.21, 7.44 <0.001
Day +7 D-dimer 455 60 3.58 2.09, 6.19 <0.001
Peak D-dimer 651 92 4.69 3.00, 7.49 <0.001
Pre-LD PTT 455 71 5.04 0.18, 114 0.32
Day +0 PTT 502 69 7.63 0.43, 111 0.14
Day +3 PTT 494 74 7.22 1.05, 45.2 0.038
Day +5 PTT 298 55 4.70 0.78, 26.0 0.079
Day +7 PTT 402 59 4.81 0.67, 30.0 0.10
Peak PTT 606 91 9.09 2.90, 27.9 <0.001
1 OR = Odds Ratio, CI = Confidence Interval
Show code
df_regression <- tbl_uvregression$table_body %>% 
  mutate(color = case_when(estimate < 1 & p.value < 0.05 ~ "Low",
                           estimate > 1 & p.value < 0.05 ~ "High",
                           .default = "Other"),
         color = as.factor(color))

vp <-
  ggplot(data = df_regression, aes(x = estimate, y = -log2(p.value), color = color)) +
  geom_point() +
  scale_color_manual(values = c("red", "darkgreen", "darkgray")) + 
  geom_hline(yintercept = -log2(0.05), color = "black", linetype = "dashed") +
  geom_hline(yintercept = -log2(0.01), color = "black", linetype = "dashed") +
  geom_vline(xintercept = 1, color = "black", linetype = "dashed") +
  geom_text(aes(x = 19, y = 3, label = "p = 0.05"), color = "black", stat = "unique") +
  geom_text(aes(x = 19, y = 8, label = "p = 0.01"), color = "black", stat = "unique") +
  geom_text(aes(x = 2, y = 75, label = "OR = 1"), color = "black", stat = "unique") +
  scale_x_continuous(name = "Odds of grade 3-4 ICAHT", limits = c(0, 20)) +
  coord_cartesian(clip = "off") +
  theme(legend.position = "none")
ggplotly(vp)
Show code
options(ggrepel.max.overlaps = Inf)
vp + geom_text_repel(data = filter(df_regression, p.value < 0.05),
                  aes(label = var_label))

CAR-HEMATOTOX score

Compute CAR-HEMATOTOX scores

Show code
# Impute missing data
df_car_hematotox <- df %>%
  select(upn,
         icaht_grade_3_4,
         anc_pre_ld,
         hb_pre_ld,
         plt_pre_ld,
         crp_pre_ld,
         ferritin_pre_ld) 

recipe <- recipe(icaht_grade_3_4 ~ ., data = df_car_hematotox) %>%
  update_role(upn, new_role = "ID") %>%
  update_role_requirements("ID", bake = FALSE) %>%
  step_impute_bag(anc_pre_ld,
                  hb_pre_ld,
                  plt_pre_ld,
                  crp_pre_ld,
                  ferritin_pre_ld,
                  seed_val = 100) 

# Extract transformed dataframe
df_car_hematotox <- recipe %>% 
  prep() %>% 
  bake(new_data = NULL)

# Calculate CAR-HEMATOTOX score (continuous)
df_car_hematotox <- df_car_hematotox %>%
  mutate(
    plt_hematotox_score = case_when(
      plt_pre_ld > 175 ~ 0,
      plt_pre_ld %in% c(75:175) ~ 1,
      plt_pre_ld < 75 ~ 2,
      is.na(plt_pre_ld) ~ NA_real_
    ),
    anc_hematotox_score = case_when(
      anc_pre_ld > 1.2 ~ 0,
      anc_pre_ld < 1.2 ~ 1,
      is.na(anc_pre_ld) ~ NA_real_
    ),
    hb_hematotox_score = case_when(hb_pre_ld > 9 ~ 0,
                                   hb_pre_ld < 9 ~ 1,
                                   is.na(hb_pre_ld) ~ NA_real_),
    crp_hematotox_score = case_when(crp_pre_ld < 3 ~ 0,
                                    crp_pre_ld > 3 ~ 1,
                                    is.na(crp_pre_ld) ~ NA_real_),
    ferritin_hematotox_score = case_when(
      ferritin_pre_ld < 650 ~ 0,
      ferritin_pre_ld %in% c(650:2000) ~ 1,
      ferritin_pre_ld > 2000 ~ 2,
      is.na(ferritin_pre_ld) ~ NA_real_
    )
  )

# Calculate CAR-HEMATOTOX score (categorical)
df_car_hematotox <- df_car_hematotox %>%
  mutate(
    car_hematotox_score = rowSums(across(
      plt_hematotox_score:ferritin_hematotox_score
    )),
    car_hematotox_categorical = case_when(car_hematotox_score < 2 ~ "Low",
                                        car_hematotox_score >= 2 ~ "High"),
    car_hematotox_categorical = as.factor(car_hematotox_categorical),
    car_hematotox_categorical = fct_relevel(car_hematotox_categorical, c("Low", "High"))
  )

var_label(df_car_hematotox$car_hematotox_score) <- "CAR-HEMATOTOX score (continuous)"
var_label(df_car_hematotox$car_hematotox_categorical) <- "CAR-HEMATOTOX score (categorical)"

Confusion matrix for CAR-HEMATOTOX score in predicting grade 3-4 ICAHT

Show code
df_matrix <- df_car_hematotox %>% 
  select(upn, icaht_grade_3_4, car_hematotox_categorical) %>% 
  mutate(car_hematotox_high = ifelse(car_hematotox_categorical == "High", 1, 0)) %>% 
  drop_na()

expected_value <- factor(df_matrix$icaht_grade_3_4, levels = c("1", "0"))
predicted_value <- factor(df_matrix$car_hematotox_high, levels = c("1", "0"))

matrix <-
  confusionMatrix(data = predicted_value,
                  reference = expected_value,
                  positive = "1")
matrix
Confusion Matrix and Statistics

          Reference
Prediction   1   0
         1  86 301
         0   5 234
                                         
               Accuracy : 0.5112         
                 95% CI : (0.4713, 0.551)
    No Information Rate : 0.8546         
    P-Value [Acc > NIR] : 1              
                                         
                  Kappa : 0.1628         
                                         
 Mcnemar's Test P-Value : <2e-16         
                                         
            Sensitivity : 0.9451         
            Specificity : 0.4374         
         Pos Pred Value : 0.2222         
         Neg Pred Value : 0.9791         
             Prevalence : 0.1454         
         Detection Rate : 0.1374         
   Detection Prevalence : 0.6182         
      Balanced Accuracy : 0.6912         
                                         
       'Positive' Class : 1              
                                         

Calculate sample size required for predicting grade 3-4 ICAHT (based on Riley et al., BMJ 2020)

Show code
# Set Cox-Snell R squared = 0.1
# 6 predictors
pmsampsize(
  type = "b",
  nagrsquared = NA,
  csrsquared = 0.1,
  rsquared = NA,
  parameters = 6,
  shrinkage = 0.9,
  prevalence = 0.16,
  cstatistic = NA,
  seed = 123,
  rate = NA,
  timepoint = NA,
  meanfup = NA,
  intercept = NA,
  sd = NA,
  mmoe = 1.1)
NB: Assuming 0.05 acceptable difference in apparent & adjusted R-squared 
NB: Assuming 0.05 margin of error in estimation of intercept 
NB: Events per Predictor Parameter (EPP) assumes prevalence = 0.16  
 
           Samp_size Shrinkage Parameter CS_Rsq Max_Rsq Nag_Rsq   EPP
Criteria 1       510     0.900         6    0.1   0.585   0.171 13.60
Criteria 2       192     0.774         6    0.1   0.585   0.171  5.12
Criteria 3       207     0.900         6    0.1   0.585   0.171  5.52
Final            510     0.900         6    0.1   0.585   0.171 13.60
 
 Minimum sample size required for new model development based on user inputs = 510, 
 with 82 events (assuming an outcome prevalence = 0.16) and an EPP = 13.6 
 
 

Development and evaluation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and pre-LD ferritin

Model development and evaluation using {tidymodels}

Create training and tests sets (70%/30%)

Show code
# Create data frame for tidy model
df_tidy <- df %>%
  mutate(
    disease_cat = ifelse(disease_cat == "ALL", "ALL", "Other"),
    disease_cat = factor(disease_cat, levels = c("ALL", "Other"))
  ) %>%
  select(
    upn,
    icaht_grade_3_4,
    disease_cat,
    LD_regimen_low_CyFlu_vs_other,
    anc_pre_ld_log10,
    plt_pre_ld_log10,
    ldh_pre_ld_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    crp_day_3_log10,
    d_dimer_day_3_log10
  ) %>%
  mutate(
    icaht_grade_3_4 = as.factor(icaht_grade_3_4),
    icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1"))
  )

# Split df into training set and testing set
set.seed(100)
df_split <-
  initial_split(df_tidy, prop = 0.7, strata = icaht_grade_3_4)
df_train <- training(df_split)
df_test <- testing(df_split)

df_train %>%
  count(icaht_grade_3_4) %>%
  mutate(prop = n / sum(n))
# A tibble: 2 × 3
# Rowwise: 
  icaht_grade_3_4     n  prop
  <fct>           <int> <dbl>
1 0                 412     1
2 1                  71     1
Show code
df_test %>%
  count(icaht_grade_3_4) %>%
  mutate(prop = n / sum(n))
# A tibble: 2 × 3
# Rowwise: 
  icaht_grade_3_4     n  prop
  <fct>           <int> <dbl>
1 0                 177     1
2 1                  31     1

Build and train logistic regression model with LASSO regularization

Show code
# Create model recipe to pre-process data
recipe <- recipe(icaht_grade_3_4 ~ ., data = df_train) %>%
  update_role(upn, new_role = "ID") %>% # Assign upn to identifier role (as opposed to predictor or outcome)
  update_role_requirements("ID", bake = FALSE) %>% 
  step_impute_bag(
    # Impute missing values (default is to use all other predictors)
    disease_cat,
    LD_regimen_low_CyFlu_vs_other,
    anc_pre_ld_log10,
    plt_pre_ld_log10,
    ldh_pre_ld_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    crp_day_3_log10,
    d_dimer_day_3_log10,
    seed_val = 100
  ) %>%
  # step_zv(all_numeric(), -all_outcomes()) %>% # Remove variables containing only a single value
  step_normalize(all_numeric(), -all_outcomes()) %>% # Normalize numeric data
  step_dummy(disease_cat, LD_regimen_low_CyFlu_vs_other) # Create dummy variables for disease_cat (nominal to numeric binary)

# Build model specification
spec_model <- logistic_reg(penalty = 0.1, mixture = 1) %>%
  set_engine("glmnet")

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training data
fit_test <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_test %>%
  extract_fit_parsnip() %>%
  tidy()
# A tibble: 6 × 3
  term                  estimate penalty
  <chr>                    <dbl>   <dbl>
1 (Intercept)           -1.77        0.1
2 disease_cat_Other      0           0.1
3 anc_pre_ld_log10      -0.0668      0.1
4 plt_pre_ld_log10      -0.135       0.1
5 ldh_pre_ld_log10       0           0.1
6 ferritin_pre_ld_log10  0.00617     0.1

Tune LASSO parameters

Show code
# Build set of bootstrap resamples
set.seed(123)
bootstrap_resamples <-
  bootstraps(df_train, strata = icaht_grade_3_4)

# Create tuning specifications
# Set penalty = tune() instead of to a single value
tune_spec <- logistic_reg(penalty = tune(), mixture = 1) %>%
  set_engine("glmnet")

# Use penalty() to set up an appropriate grid
lambda_grid <- grid_regular(penalty(), levels = 5)

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

# lasso_grid <- tune_grid(
#   wf %>% update_model(tune_spec, formula = model_formula),
#   resamples = bootstrap_resamples,
#   grid = lambda_grid
# )

lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.874     25 0.00299 Preproces…
 2 0.0000000001 brier_class binary     0.0999    25 0.00189 Preproces…
 3 0.0000000001 roc_auc     binary     0.777     25 0.00826 Preproces…
 4 0.0000000316 accuracy    binary     0.874     25 0.00299 Preproces…
 5 0.0000000316 brier_class binary     0.0999    25 0.00189 Preproces…
 6 0.0000000316 roc_auc     binary     0.777     25 0.00826 Preproces…
 7 0.00001      accuracy    binary     0.874     25 0.00299 Preproces…
 8 0.00001      brier_class binary     0.0999    25 0.00189 Preproces…
 9 0.00001      roc_auc     binary     0.777     25 0.00826 Preproces…
10 0.00316      accuracy    binary     0.875     25 0.00293 Preproces…
11 0.00316      brier_class binary     0.0997    25 0.00186 Preproces…
12 0.00316      roc_auc     binary     0.778     25 0.00829 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…
Show code
# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")
Show code
# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.08 0.093280
5   4  9.27 0.084990
6   4 11.39 0.077440
7   4 13.12 0.070560
8   4 14.56 0.064300
9   4 15.77 0.058580
10  4 16.79 0.053380
11  4 17.66 0.048640
12  4 18.40 0.044320
13  4 19.04 0.040380
14  4 19.58 0.036790
15  4 20.05 0.033520
16  4 20.45 0.030550
17  4 20.79 0.027830
18  4 21.08 0.025360
19  5 21.33 0.023110
20  5 21.56 0.021050
21  5 21.76 0.019180
22  5 21.92 0.017480
23  5 22.07 0.015930
24  5 22.19 0.014510
25  5 22.29 0.013220
26  5 22.38 0.012050
27  5 22.45 0.010980
28  5 22.51 0.010000
29  5 22.56 0.009114
30  5 22.61 0.008304
31  5 22.64 0.007566
32  5 22.68 0.006894
33  5 22.70 0.006282
34  5 22.72 0.005724
35  5 22.74 0.005215
36  5 22.76 0.004752
37  5 22.77 0.004330
38  5 22.78 0.003945
39  5 22.79 0.003595
40  5 22.80 0.003275
41  5 22.80 0.002984
42  5 22.81 0.002719
43  5 22.81 0.002478
44  5 22.82 0.002258
45  5 22.82 0.002057
46  5 22.82 0.001874

...
and 6 more lines.
Show code
# saveRDS(wf_final_trained, "eIPMPre.rds")

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod
parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.08 0.093280
5   4  9.27 0.084990
6   4 11.39 0.077440
7   4 13.12 0.070560
8   4 14.56 0.064300
9   4 15.77 0.058580
10  4 16.79 0.053380
11  4 17.66 0.048640
12  4 18.40 0.044320
13  4 19.04 0.040380
14  4 19.58 0.036790
15  4 20.05 0.033520
16  4 20.45 0.030550
17  4 20.79 0.027830
18  4 21.08 0.025360
19  5 21.33 0.023110
20  5 21.56 0.021050
21  5 21.76 0.019180
22  5 21.92 0.017480
23  5 22.07 0.015930
24  5 22.19 0.014510
25  5 22.29 0.013220
26  5 22.38 0.012050
27  5 22.45 0.010980
28  5 22.51 0.010000
29  5 22.56 0.009114
30  5 22.61 0.008304
31  5 22.64 0.007566
32  5 22.68 0.006894
33  5 22.70 0.006282
34  5 22.72 0.005724
35  5 22.74 0.005215
36  5 22.76 0.004752
37  5 22.77 0.004330
38  5 22.78 0.003945
39  5 22.79 0.003595
40  5 22.80 0.003275
41  5 22.80 0.002984
42  5 22.81 0.002719
43  5 22.81 0.002478
44  5 22.82 0.002258
45  5 22.82 0.002057
46  5 22.82 0.001874
47  5 22.83 0.001708
48  5 22.83 0.001556
49  5 22.83 0.001418
50  5 22.83 0.001292
51  5 22.83 0.001177
52  5 22.83 0.001073
Show code
# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs
# A tibble: 6 × 3
  term                  estimate penalty
  <chr>                    <dbl>   <dbl>
1 (Intercept)             -1.98  0.00316
2 disease_cat_Other       -0.207 0.00316
3 anc_pre_ld_log10        -0.534 0.00316
4 plt_pre_ld_log10        -0.278 0.00316
5 ldh_pre_ld_log10         0.477 0.00316
6 ferritin_pre_ld_log10    0.460 0.00316
Show code
# # Test model against Shiny app output
# df_test <- tibble(
#   disease_cat = "ALL",
#   LD_regimen_low_CyFlu_vs_other = NA,
#   anc_pre_ld = 1.5,
#   plt_pre_ld = 175,
#   ldh_pre_ld = 250,
#   ferritin_pre_ld = 300,
#   ferritin_day_0 = NA,
#   ferritin_day_3 = NA,
#   crp_day_3 = NA,
#   d_dimer_day_3 = NA
# )
# 
# df_test <- df_test %>%
#   mutate(
#     across(anc_pre_ld:d_dimer_day_3, ~ ifelse(. == 0, 0.01, .), .names = "{.col}_log10"),
#     across(anc_pre_ld_log10:d_dimer_day_3_log10, log10)
#   )
# 
# predict(wf_final_trained, df_test, type = "prob")

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()
Show code
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.873  Preprocessor1_Model1
3 brier_class binary        0.0868 Preprocessor1_Model1
Show code
# Plot calibration curve for test set using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_pre_ferritin_pre_ld <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_pre_ferritin_pre_ld <- cal_plot(
  df_mod_pre_ferritin_pre_ld,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPre"
)
p_cal_pre_ferritin_pre_ld
Show code
# Plot ROC curve for test set
p_roc_pre_ferritin_pre_ld <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPre")
p_roc_pre_ferritin_pre_ld

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code
cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)
Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

   AUC   n n_pos n_neg
 0.873 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1176 0.6191 0.7212      0.9355      0.6836 29  2 56 121

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.01479182 0.02326008 0.04465773 0.09131790 0.1567422
       0 0.01479182 0.02211389 0.03930081 0.07594596 0.1143297
       1 0.05917070 0.11217653 0.17613996 0.29002852 0.3989035
   3rd Qu.       95%      Max.        SD NAs
 0.1948782 0.6011735 0.9946637 0.1795614   0
 0.1519761 0.3298866 0.6286539 0.1082159   0
 0.6252787 0.9019418 0.9946637 0.2873314   0

Decision curve analysis of final model fitted on test dataset

Show code
df_mod_pre_ferritin_pre_ld <- df_mod_pre_ferritin_pre_ld %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_pre_ferritin_pre_ld,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE)

Development and evaluation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, lymphodepletion regimen, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and pre-LD ferritin

Model development and evaluation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code
# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + LD_regimen_low_CyFlu_vs_other_Other + anc_pre_ld_log10 +
  plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_test <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_test %>%
  extract_fit_parsnip() %>%
  tidy()
# A tibble: 7 × 3
  term                                estimate penalty
  <chr>                                  <dbl>   <dbl>
1 (Intercept)                         -1.77        0.1
2 disease_cat_Other                    0           0.1
3 LD_regimen_low_CyFlu_vs_other_Other  0           0.1
4 anc_pre_ld_log10                    -0.0668      0.1
5 plt_pre_ld_log10                    -0.135       0.1
6 ldh_pre_ld_log10                     0           0.1
7 ferritin_pre_ld_log10                0.00617     0.1

Tune LASSO parameters

Show code
# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.875     25 0.00304 Preproces…
 2 0.0000000001 brier_class binary     0.0965    25 0.00187 Preproces…
 3 0.0000000001 roc_auc     binary     0.794     25 0.00790 Preproces…
 4 0.0000000316 accuracy    binary     0.875     25 0.00304 Preproces…
 5 0.0000000316 brier_class binary     0.0965    25 0.00187 Preproces…
 6 0.0000000316 roc_auc     binary     0.794     25 0.00790 Preproces…
 7 0.00001      accuracy    binary     0.875     25 0.00304 Preproces…
 8 0.00001      brier_class binary     0.0965    25 0.00187 Preproces…
 9 0.00001      roc_auc     binary     0.794     25 0.00790 Preproces…
10 0.00316      accuracy    binary     0.876     25 0.00306 Preproces…
11 0.00316      brier_class binary     0.0965    25 0.00184 Preproces…
12 0.00316      roc_auc     binary     0.792     25 0.00802 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…
Show code
# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")
Show code
# Select highest accuracy
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.08 0.093280
5   4  9.27 0.084990
6   4 11.39 0.077440
7   4 13.12 0.070560
8   4 14.56 0.064300
9   4 15.77 0.058580
10  4 16.79 0.053380
11  4 17.66 0.048640
12  4 18.40 0.044320
13  5 19.27 0.040380
14  5 20.07 0.036790
15  5 20.76 0.033520
16  5 21.36 0.030550
17  5 21.87 0.027830
18  5 22.31 0.025360
19  6 22.71 0.023110
20  6 23.06 0.021050
21  6 23.35 0.019180
22  6 23.60 0.017480
23  6 23.82 0.015930
24  6 24.00 0.014510
25  6 24.16 0.013220
26  6 24.29 0.012050
27  6 24.41 0.010980
28  6 24.50 0.010000
29  6 24.58 0.009114
30  6 24.65 0.008304
31  6 24.71 0.007566
32  6 24.76 0.006894
33  6 24.80 0.006282
34  6 24.83 0.005724
35  6 24.86 0.005215
36  6 24.89 0.004752
37  6 24.91 0.004330
38  6 24.92 0.003945
39  6 24.94 0.003595
40  6 24.95 0.003275
41  6 24.96 0.002984
42  6 24.97 0.002719
43  6 24.98 0.002478
44  6 24.98 0.002258
45  6 24.99 0.002057
46  6 24.99 0.001874

...
and 8 more lines.
Show code
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod
parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.08 0.093280
5   4  9.27 0.084990
6   4 11.39 0.077440
7   4 13.12 0.070560
8   4 14.56 0.064300
9   4 15.77 0.058580
10  4 16.79 0.053380
11  4 17.66 0.048640
12  4 18.40 0.044320
13  5 19.27 0.040380
14  5 20.07 0.036790
15  5 20.76 0.033520
16  5 21.36 0.030550
17  5 21.87 0.027830
18  5 22.31 0.025360
19  6 22.71 0.023110
20  6 23.06 0.021050
21  6 23.35 0.019180
22  6 23.60 0.017480
23  6 23.82 0.015930
24  6 24.00 0.014510
25  6 24.16 0.013220
26  6 24.29 0.012050
27  6 24.41 0.010980
28  6 24.50 0.010000
29  6 24.58 0.009114
30  6 24.65 0.008304
31  6 24.71 0.007566
32  6 24.76 0.006894
33  6 24.80 0.006282
34  6 24.83 0.005724
35  6 24.86 0.005215
36  6 24.89 0.004752
37  6 24.91 0.004330
38  6 24.92 0.003945
39  6 24.94 0.003595
40  6 24.95 0.003275
41  6 24.96 0.002984
42  6 24.97 0.002719
43  6 24.98 0.002478
44  6 24.98 0.002258
45  6 24.99 0.002057
46  6 24.99 0.001874
47  6 24.99 0.001708
48  6 25.00 0.001556
49  6 25.00 0.001418
50  6 25.00 0.001292
51  6 25.00 0.001177
52  6 25.00 0.001073
53  6 25.00 0.000977
54  6 25.01 0.000890
Show code
# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs
# A tibble: 7 × 3
  term                                estimate      penalty
  <chr>                                  <dbl>        <dbl>
1 (Intercept)                           -2.40  0.0000000001
2 disease_cat_Other                     -0.276 0.0000000001
3 LD_regimen_low_CyFlu_vs_other_Other    0.885 0.0000000001
4 anc_pre_ld_log10                      -0.546 0.0000000001
5 plt_pre_ld_log10                      -0.377 0.0000000001
6 ldh_pre_ld_log10                       0.461 0.0000000001
7 ferritin_pre_ld_log10                  0.451 0.0000000001
Show code
# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()
Show code
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.840  Preprocessor1_Model1
3 brier_class binary        0.0906 Preprocessor1_Model1
Show code
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )
Show code
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_pre_ferritin_pre_ld_LD_regimen <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_pre_ferritin_pre_ld_LD_regimen <- cal_plot(
  df_mod_pre_ferritin_pre_ld_LD_regimen,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPre (+ LD regimen)"
)
p_cal_pre_ferritin_pre_ld_LD_regimen
Show code
# Plot ROC curve for test set
p_roc_pre_ferritin_pre_ld_LD_regimen <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPre (+ LD regimen)")
p_roc_pre_ferritin_pre_ld_LD_regimen

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code
cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld_LD_regimen,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)
Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8402 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1315 0.5619 0.7404      0.8387      0.7232 26  5 49 128

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.01060906 0.01585109 0.03924637 0.08963944 0.1546460
       0 0.01060906 0.01439415 0.03395278 0.07364204 0.1123256
       1 0.03593784 0.07432365 0.13772072 0.27499010 0.3962817
   3rd Qu.       95%      Max.        SD NAs
 0.1747355 0.5596489 0.9966204 0.1895385   0
 0.1435229 0.3445912 0.7379206 0.1168601   0
 0.6503889 0.9099296 0.9966204 0.3112291   0

Decision curve analysis of final model fitted on test set

Show code
df_mod_pre_ferritin_pre_ld_LD_regimen <- df_mod_pre_ferritin_pre_ld_LD_regimen %>%
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(
  icaht_grade_3_4 ~ .pred_1,
  data = df_mod_pre_ferritin_pre_ld_LD_regimen,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(.pred_1 = "eIPMPre")
) %>%
  plot(smooth = TRUE)

Development and evaluation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and day +3 ferritin

Model development and evaluation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code
# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training data
fit_test <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_test %>%
  extract_fit_parsnip() %>%
  tidy()
# A tibble: 6 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 anc_pre_ld_log10      -0.0683     0.1
4 plt_pre_ld_log10      -0.0710     0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.156      0.1

Tune LASSO parameters

Show code
# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.873     25 0.00397 Preproces…
 2 0.0000000001 brier_class binary     0.0973    25 0.00213 Preproces…
 3 0.0000000001 roc_auc     binary     0.814     25 0.00761 Preproces…
 4 0.0000000316 accuracy    binary     0.873     25 0.00397 Preproces…
 5 0.0000000316 brier_class binary     0.0973    25 0.00213 Preproces…
 6 0.0000000316 roc_auc     binary     0.814     25 0.00761 Preproces…
 7 0.00001      accuracy    binary     0.873     25 0.00397 Preproces…
 8 0.00001      brier_class binary     0.0973    25 0.00213 Preproces…
 9 0.00001      roc_auc     binary     0.814     25 0.00761 Preproces…
10 0.00316      accuracy    binary     0.875     25 0.00332 Preproces…
11 0.00316      brier_class binary     0.0970    25 0.00208 Preproces…
12 0.00316      roc_auc     binary     0.814     25 0.00763 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…
Show code
# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")
Show code
# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.125700
2   3  3.24 0.114500
3   3  6.65 0.104300
4   3  9.32 0.095060
5   3 11.47 0.086610
6   4 13.36 0.078920
7   4 15.28 0.071910
8   4 16.88 0.065520
9   4 18.21 0.059700
10  4 19.34 0.054400
11  4 20.30 0.049560
12  4 21.12 0.045160
13  4 21.82 0.041150
14  4 22.43 0.037490
15  4 22.94 0.034160
16  4 23.39 0.031130
17  4 23.77 0.028360
18  4 24.09 0.025840
19  4 24.37 0.023550
20  4 24.61 0.021450
21  4 24.82 0.019550
22  5 25.00 0.017810
23  5 25.16 0.016230
24  5 25.30 0.014790
25  5 25.42 0.013470
26  5 25.51 0.012280
27  5 25.60 0.011190
28  5 25.67 0.010190
29  5 25.73 0.009287
30  5 25.78 0.008462
31  5 25.82 0.007710
32  5 25.86 0.007025
33  5 25.89 0.006401
34  5 25.92 0.005833
35  5 25.94 0.005315
36  5 25.96 0.004842
37  5 25.97 0.004412
38  5 25.98 0.004020
39  5 26.00 0.003663
40  5 26.00 0.003338
41  5 26.01 0.003041
42  5 26.02 0.002771
43  5 26.02 0.002525
44  5 26.03 0.002301
45  5 26.03 0.002096
46  5 26.03 0.001910

...
and 7 more lines.
Show code
# saveRDS(wf_final_trained, "eIPMPost.rds")

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod
parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.125700
2   3  3.24 0.114500
3   3  6.65 0.104300
4   3  9.32 0.095060
5   3 11.47 0.086610
6   4 13.36 0.078920
7   4 15.28 0.071910
8   4 16.88 0.065520
9   4 18.21 0.059700
10  4 19.34 0.054400
11  4 20.30 0.049560
12  4 21.12 0.045160
13  4 21.82 0.041150
14  4 22.43 0.037490
15  4 22.94 0.034160
16  4 23.39 0.031130
17  4 23.77 0.028360
18  4 24.09 0.025840
19  4 24.37 0.023550
20  4 24.61 0.021450
21  4 24.82 0.019550
22  5 25.00 0.017810
23  5 25.16 0.016230
24  5 25.30 0.014790
25  5 25.42 0.013470
26  5 25.51 0.012280
27  5 25.60 0.011190
28  5 25.67 0.010190
29  5 25.73 0.009287
30  5 25.78 0.008462
31  5 25.82 0.007710
32  5 25.86 0.007025
33  5 25.89 0.006401
34  5 25.92 0.005833
35  5 25.94 0.005315
36  5 25.96 0.004842
37  5 25.97 0.004412
38  5 25.98 0.004020
39  5 26.00 0.003663
40  5 26.00 0.003338
41  5 26.01 0.003041
42  5 26.02 0.002771
43  5 26.02 0.002525
44  5 26.03 0.002301
45  5 26.03 0.002096
46  5 26.03 0.001910
47  5 26.04 0.001740
48  5 26.04 0.001586
49  5 26.04 0.001445
50  5 26.04 0.001316
51  5 26.04 0.001199
52  5 26.04 0.001093
53  5 26.05 0.000996
Show code
# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs
# A tibble: 6 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)            -2.10  0.00316
2 disease_cat_Other      -0.181 0.00316
3 anc_pre_ld_log10       -0.547 0.00316
4 plt_pre_ld_log10       -0.204 0.00316
5 ldh_pre_ld_log10        0.448 0.00316
6 ferritin_day_3_log10    0.752 0.00316
Show code
# # Test model against Shiny app output
# df_test <- tibble(
#   disease_cat = "ALL",
#   LD_regimen_low_CyFlu_vs_other = NA,
#   anc_pre_ld = 1.5,
#   plt_pre_ld = 175,
#   ldh_pre_ld = 250,
#   ferritin_pre_ld = NA,
#   ferritin_day_0 = NA,
#   ferritin_day_3 = 300,
#   crp_day_3 = NA,
#   d_dimer_day_3 = NA
# )
# 
# df_test <- df_test %>%
#   mutate(
#     across(anc_pre_ld:d_dimer_day_3, ~ ifelse(. == 0, 0.01, .), .names = "{.col}_log10"),
#     across(anc_pre_ld_log10:d_dimer_day_3_log10, log10)
#   )
# 
# predict(wf_final_trained, df_test, type = "prob")

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()
Show code
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.889  Preprocessor1_Model1
2 roc_auc     binary        0.879  Preprocessor1_Model1
3 brier_class binary        0.0846 Preprocessor1_Model1
Show code
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )
Show code
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_post_ferritin_only <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_ferritin_only <- cal_plot(
  df_mod_post_ferritin_only,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost"
)
p_cal_post_ferritin_only
Show code
# Plot ROC curve for test set
p_roc_post_ferritin_only <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost")
p_roc_post_ferritin_only

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code
cp <- cutpointr::cutpointr(
  data = df_mod_post_ferritin_only,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)
Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8788 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1534 0.6127 0.7837      0.8387       0.774 26  5 40 137

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.006755491 0.01693932 0.03906943 0.09193796 0.1566729
       0 0.006755491 0.01619801 0.03537370 0.07525395 0.1110887
       1 0.050928947 0.10428795 0.17604793 0.29682932 0.4169440
   3rd Qu.       95%      Max.        SD NAs
 0.1940067 0.5855342 0.9967191 0.1883115   0
 0.1356604 0.3218302 0.6542548 0.1112023   0
 0.6370743 0.9626319 0.9967191 0.2997944   0

Decision curve analysis of final model fitted on test dataset

Show code
df_mod_post_ferritin_only <- df_mod_post_ferritin_only %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_post_ferritin_only,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE)

Development and evaluation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, day +3 ferritin, and day +3 CRP

Model development and evaluation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code
# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 +
  ferritin_day_3_log10 + crp_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training data
fit_test <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_test %>%
  extract_fit_parsnip() %>%
  tidy()
# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 anc_pre_ld_log10      -0.0683     0.1
4 plt_pre_ld_log10      -0.0710     0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.156      0.1
7 crp_day_3_log10        0          0.1

Tune LASSO parameters

Show code
# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.873     25 0.00341 Preproces…
 2 0.0000000001 brier_class binary     0.0967    25 0.00208 Preproces…
 3 0.0000000001 roc_auc     binary     0.822     25 0.00747 Preproces…
 4 0.0000000316 accuracy    binary     0.873     25 0.00341 Preproces…
 5 0.0000000316 brier_class binary     0.0967    25 0.00208 Preproces…
 6 0.0000000316 roc_auc     binary     0.822     25 0.00747 Preproces…
 7 0.00001      accuracy    binary     0.873     25 0.00341 Preproces…
 8 0.00001      brier_class binary     0.0967    25 0.00208 Preproces…
 9 0.00001      roc_auc     binary     0.822     25 0.00747 Preproces…
10 0.00316      accuracy    binary     0.874     25 0.00321 Preproces…
11 0.00316      brier_class binary     0.0964    25 0.00202 Preproces…
12 0.00316      roc_auc     binary     0.822     25 0.00760 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…
Show code
# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")
Show code
# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.125700
2   3  3.24 0.114500
3   3  6.65 0.104300
4   3  9.32 0.095060
5   3 11.47 0.086610
6   4 13.36 0.078920
7   4 15.28 0.071910
8   4 16.88 0.065520
9   4 18.21 0.059700
10  4 19.34 0.054400
11  5 20.41 0.049560
12  5 21.39 0.045160
13  5 22.24 0.041150
14  5 22.97 0.037490
15  5 23.60 0.034160
16  5 24.15 0.031130
17  5 24.62 0.028360
18  5 25.02 0.025840
19  5 25.37 0.023550
20  6 25.70 0.021450
21  6 25.98 0.019550
22  6 26.23 0.017810
23  6 26.44 0.016230
24  6 26.62 0.014790
25  6 26.77 0.013470
26  6 26.90 0.012280
27  6 27.01 0.011190
28  6 27.11 0.010190
29  6 27.19 0.009287
30  6 27.26 0.008462
31  6 27.32 0.007710
32  6 27.37 0.007025
33  6 27.41 0.006401
34  6 27.44 0.005833
35  6 27.47 0.005315
36  6 27.50 0.004842
37  6 27.52 0.004412
38  6 27.54 0.004020
39  6 27.55 0.003663
40  6 27.56 0.003338
41  6 27.57 0.003041
42  6 27.58 0.002771
43  6 27.59 0.002525
44  6 27.60 0.002301
45  6 27.60 0.002096
46  6 27.61 0.001910

...
and 8 more lines.
Show code
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod
parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.125700
2   3  3.24 0.114500
3   3  6.65 0.104300
4   3  9.32 0.095060
5   3 11.47 0.086610
6   4 13.36 0.078920
7   4 15.28 0.071910
8   4 16.88 0.065520
9   4 18.21 0.059700
10  4 19.34 0.054400
11  5 20.41 0.049560
12  5 21.39 0.045160
13  5 22.24 0.041150
14  5 22.97 0.037490
15  5 23.60 0.034160
16  5 24.15 0.031130
17  5 24.62 0.028360
18  5 25.02 0.025840
19  5 25.37 0.023550
20  6 25.70 0.021450
21  6 25.98 0.019550
22  6 26.23 0.017810
23  6 26.44 0.016230
24  6 26.62 0.014790
25  6 26.77 0.013470
26  6 26.90 0.012280
27  6 27.01 0.011190
28  6 27.11 0.010190
29  6 27.19 0.009287
30  6 27.26 0.008462
31  6 27.32 0.007710
32  6 27.37 0.007025
33  6 27.41 0.006401
34  6 27.44 0.005833
35  6 27.47 0.005315
36  6 27.50 0.004842
37  6 27.52 0.004412
38  6 27.54 0.004020
39  6 27.55 0.003663
40  6 27.56 0.003338
41  6 27.57 0.003041
42  6 27.58 0.002771
43  6 27.59 0.002525
44  6 27.60 0.002301
45  6 27.60 0.002096
46  6 27.61 0.001910
47  6 27.61 0.001740
48  6 27.61 0.001586
49  6 27.61 0.001445
50  6 27.62 0.001316
51  6 27.62 0.001199
52  6 27.62 0.001093
53  6 27.62 0.000996
54  6 27.62 0.000907
Show code
# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs
# A tibble: 7 × 3
  term                 estimate      penalty
  <chr>                   <dbl>        <dbl>
1 (Intercept)            -2.07  0.0000000001
2 disease_cat_Other      -0.334 0.0000000001
3 anc_pre_ld_log10       -0.561 0.0000000001
4 plt_pre_ld_log10       -0.250 0.0000000001
5 ldh_pre_ld_log10        0.432 0.0000000001
6 ferritin_day_3_log10    0.553 0.0000000001
7 crp_day_3_log10         0.511 0.0000000001
Show code
# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()
Show code
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.889  Preprocessor1_Model1
2 roc_auc     binary        0.867  Preprocessor1_Model1
3 brier_class binary        0.0881 Preprocessor1_Model1
Show code
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )
Show code
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_post_crp <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_crp <- cal_plot(
  df_mod_post_crp,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (+ day +3 CRP)"
)
p_cal_post_crp
Show code
# Plot ROC curve for test set
p_roc_post_crp <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost (+ day +3 CRP)")
p_roc_post_crp

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code
cp <- cutpointr::cutpointr(
  data = df_mod_post_crp,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)
Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8666 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.2227 0.6063 0.8462      0.7419      0.8644 23  8 24 153

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.004469136 0.01111110 0.03726027 0.08967302 0.1564522
       0 0.004469136 0.01074218 0.03214660 0.06861536 0.1116378
       1 0.049842877 0.10034928 0.19044590 0.27900757 0.4123281
   3rd Qu.       95%      Max.        SD NAs
 0.1814052 0.6403928 0.9971253 0.1941113   0
 0.1453168 0.3434017 0.7347372 0.1217198   0
 0.6948509 0.9706088 0.9971253 0.3058815   0

Decision curve analysis of final model fitted on test dataset

Show code
df_mod_post_crp <- df_mod_post_crp %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_post_crp,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE)

Development and evaluation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, day +3 ferritin, and day +3 D-dimer

Model development and evaluation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code
# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 +
  ferritin_day_3_log10 + d_dimer_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training data
fit_test <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_test %>%
  extract_fit_parsnip() %>%
  tidy()
# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 anc_pre_ld_log10      -0.0683     0.1
4 plt_pre_ld_log10      -0.0710     0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.156      0.1
7 d_dimer_day_3_log10    0          0.1

Tune LASSO parameters

Show code
# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.874     25 0.00325 Preproces…
 2 0.0000000001 brier_class binary     0.0979    25 0.00215 Preproces…
 3 0.0000000001 roc_auc     binary     0.807     25 0.00797 Preproces…
 4 0.0000000316 accuracy    binary     0.874     25 0.00325 Preproces…
 5 0.0000000316 brier_class binary     0.0979    25 0.00215 Preproces…
 6 0.0000000316 roc_auc     binary     0.807     25 0.00797 Preproces…
 7 0.00001      accuracy    binary     0.874     25 0.00325 Preproces…
 8 0.00001      brier_class binary     0.0979    25 0.00215 Preproces…
 9 0.00001      roc_auc     binary     0.807     25 0.00797 Preproces…
10 0.00316      accuracy    binary     0.874     25 0.00325 Preproces…
11 0.00316      brier_class binary     0.0975    25 0.00209 Preproces…
12 0.00316      roc_auc     binary     0.808     25 0.00800 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…
Show code
# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")
Show code
# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.125700
2   3  3.24 0.114500
3   3  6.65 0.104300
4   3  9.32 0.095060
5   3 11.47 0.086610
6   4 13.36 0.078920
7   4 15.28 0.071910
8   4 16.88 0.065520
9   4 18.21 0.059700
10  4 19.34 0.054400
11  4 20.30 0.049560
12  4 21.12 0.045160
13  4 21.82 0.041150
14  4 22.43 0.037490
15  4 22.94 0.034160
16  5 23.40 0.031130
17  5 23.81 0.028360
18  5 24.17 0.025840
19  5 24.47 0.023550
20  5 24.72 0.021450
21  6 24.96 0.019550
22  6 25.17 0.017810
23  6 25.35 0.016230
24  6 25.50 0.014790
25  6 25.63 0.013470
26  6 25.74 0.012280
27  6 25.84 0.011190
28  6 25.92 0.010190
29  6 25.98 0.009287
30  6 26.04 0.008462
31  6 26.09 0.007710
32  6 26.13 0.007025
33  6 26.16 0.006401
34  6 26.19 0.005833
35  6 26.22 0.005315
36  6 26.24 0.004842
37  6 26.25 0.004412
38  6 26.27 0.004020
39  6 26.28 0.003663
40  6 26.29 0.003338
41  6 26.30 0.003041
42  6 26.31 0.002771
43  6 26.31 0.002525
44  6 26.32 0.002301
45  6 26.32 0.002096
46  6 26.32 0.001910

...
and 7 more lines.
Show code
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod
parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.125700
2   3  3.24 0.114500
3   3  6.65 0.104300
4   3  9.32 0.095060
5   3 11.47 0.086610
6   4 13.36 0.078920
7   4 15.28 0.071910
8   4 16.88 0.065520
9   4 18.21 0.059700
10  4 19.34 0.054400
11  4 20.30 0.049560
12  4 21.12 0.045160
13  4 21.82 0.041150
14  4 22.43 0.037490
15  4 22.94 0.034160
16  5 23.40 0.031130
17  5 23.81 0.028360
18  5 24.17 0.025840
19  5 24.47 0.023550
20  5 24.72 0.021450
21  6 24.96 0.019550
22  6 25.17 0.017810
23  6 25.35 0.016230
24  6 25.50 0.014790
25  6 25.63 0.013470
26  6 25.74 0.012280
27  6 25.84 0.011190
28  6 25.92 0.010190
29  6 25.98 0.009287
30  6 26.04 0.008462
31  6 26.09 0.007710
32  6 26.13 0.007025
33  6 26.16 0.006401
34  6 26.19 0.005833
35  6 26.22 0.005315
36  6 26.24 0.004842
37  6 26.25 0.004412
38  6 26.27 0.004020
39  6 26.28 0.003663
40  6 26.29 0.003338
41  6 26.30 0.003041
42  6 26.31 0.002771
43  6 26.31 0.002525
44  6 26.32 0.002301
45  6 26.32 0.002096
46  6 26.32 0.001910
47  6 26.33 0.001740
48  6 26.33 0.001586
49  6 26.33 0.001445
50  6 26.33 0.001316
51  6 26.33 0.001199
52  6 26.34 0.001093
53  6 26.34 0.000996
Show code
# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs
# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)            -2.01  0.00316
2 disease_cat_Other      -0.271 0.00316
3 anc_pre_ld_log10       -0.546 0.00316
4 plt_pre_ld_log10       -0.205 0.00316
5 ldh_pre_ld_log10        0.434 0.00316
6 ferritin_day_3_log10    0.650 0.00316
7 d_dimer_day_3_log10     0.161 0.00316
Show code
# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()
Show code
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.877  Preprocessor1_Model1
3 brier_class binary        0.0853 Preprocessor1_Model1
Show code
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )
Show code
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_post_d_dimer <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_d_dimer <- cal_plot(
  df_mod_post_d_dimer,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (+ day +3 D-dimer)"
)
p_cal_post_d_dimer
Show code
# Plot ROC curve for test set
p_roc_post_d_dimer <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost (+ day +3 D-dimer)")
p_roc_post_d_dimer

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code
cp <- cutpointr::cutpointr(
  data = df_mod_post_d_dimer,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)
Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8772 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.2171 0.6176 0.8558      0.7419      0.8757 23  8 22 155

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.007766116 0.01750785 0.03889264 0.08848150 0.1573556
       0 0.007766116 0.01738958 0.03607295 0.07234603 0.1120083
       1 0.044616841 0.10484003 0.18758762 0.30055495 0.4162737
   3rd Qu.       95%      Max.        SD NAs
 0.1836877 0.5839416 0.9969261 0.1891879   0
 0.1511257 0.3616618 0.6807138 0.1143803   0
 0.6312323 0.9626119 0.9969261 0.2980046   0

Decision curve analysis of final model fitted on test dataset

Show code
df_mod_post_d_dimer <- df_mod_post_d_dimer %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_post_d_dimer,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE)

Decision curve analyses of early ICAHT prediction models (eIPM)

eIPMPre vs. eIPMPost

Show code
df_mod_all <- df_mod_pre_ferritin_pre_ld %>%
  rename(predictions_pre_ferritin_pre_ld = .pred_1) %>%
  left_join(
    .,
    df_mod_pre_ferritin_pre_ld_LD_regimen %>% select(upn, predictions_pre_ferritin_pre_ld_LD_regimen = .pred_1),
    by = "upn"
  ) %>%
  left_join(.,
            df_mod_post_crp %>% select(upn, predictions_post_crp = .pred_1),
            by = "upn") %>%
  left_join(
    .,
    df_mod_post_d_dimer %>% select(upn, predictions_post_d_dimer = .pred_1),
    by = "upn"
  ) %>%
  left_join(
    .,
    df_mod_post_ferritin_only %>% select(upn, predictions_post_ferritin_only = .pred_1),
    by = "upn"
  ) %>%
  select(
    upn,
    icaht_grade_3_4,
    predictions_pre_ferritin_pre_ld,
    predictions_pre_ferritin_pre_ld_LD_regimen,
    predictions_post_ferritin_only,
    predictions_post_crp,
    predictions_post_d_dimer
  )

dcurves::dca(
  icaht_grade_3_4 ~ predictions_pre_ferritin_pre_ld + predictions_post_ferritin_only,
  data = df_mod_all,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(
    predictions_pre_ferritin_pre_ld = "eIPMPre",
    predictions_post_ferritin_only = "eIPMPost"
  )
) %>%
  plot(smooth = TRUE)

eIPMPre with pre-LD ferritin vs. eIPMPre (+ LD regimen) vs. all eIPMPosts

Show code
dcurves::dca(
  icaht_grade_3_4 ~ predictions_pre_ferritin_pre_ld + predictions_pre_ferritin_pre_ld_LD_regimen +
    predictions_post_ferritin_only + predictions_post_crp + predictions_post_d_dimer,
  data = df_mod_all,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(
    predictions_pre_ferritin_pre_ld = "eIPMPre",
    predictions_pre_ferritin_pre_ld_LD_regimen = "eIPMPre (+ LD regimen)",
    predictions_post_ferritin_only = "eIPMPost",
    predictions_post_crp ="eIPMPost (+ day +3 CRP)",
    predictions_post_d_dimer ="eIPMPost (+ day +3 D-dimer)"
  )
) %>%
  plot(smooth = TRUE)

Patchwork layout of ROC curves of eIPMPre with pre-LD ferritin, eIPMPre (+ LD regimen), and all eIPMPosts

Show code
p <- p_roc_pre_ferritin_pre_ld + p_roc_pre_ferritin_pre_ld_LD_regimen + p_roc_post_ferritin_only +
  p_roc_post_crp + p_roc_post_d_dimer
p + plot_layout(ncol = 2) + plot_annotation(tag_levels = "A")

Characteristics of training and test cohorts

Training cohort

Show code
df %>% 
  filter(upn %in% df_train$upn) %>% 
  select(
    cart_age,
    gender_desc,
    racenih,
    ethnih,
    prior_hct,
    disease_cat,
    anc_pre_ld,
    hb_pre_ld, 
    plt_pre_ld,
    LD_regimen_cat,
    product_infused,
    product_cat,
    costim_domain
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()
Characteristic N = 4831
Age
    Median (IQR) 62 (50, 68)
    Range 21 - 84
Sex
    Male 306 (63%)
    Female 177 (37%)
Race
    White 413 (86%)
    Asian 35 (7%)
    Black or African American 13 (3%)
    American Indian or Alaska Native 11 (2%)
    Multiple 6 (1%)
    Unknown 3 (1%)
    Native Hawaiian or other Pacific Islander 2 (0%)
Ethnicity
    Not Hispanic/Latino or Unknown 446 (92%)
    Hispanic or Latino 37 (8%)
Prior HCT 161 (33%)
Disease
    Aggressive NHL 216 (45%)
    Indolent NHL 110 (23%)
    MM/PCL 79 (16%)
    ALL 78 (16%)
Pre-LD ANC
    Median (IQR) 2.86 (1.84, 4.22)
    Range 0.00 - 23.17
    Missing 1
Pre-LD Hb
    Median (IQR) 10.70 (9.50, 12.25)
    Range 7.30 - 17.10
Pre-LD platelet count
    Median (IQR) 146 (94, 203)
    Range 6 - 790
Lymphodepletion regimen
    Low-intensity Cy/Flu 284 (59%)
    High-intensity Cy/Flu 175 (36%)
    Other 24 (5%)
CAR T-cell product
    JCAR014 134 (28%)
    YESCARTA 94 (19%)
    BREYANZI 64 (13%)
    CARVYKTI 35 (7%)
    Investigational CD20 product 34 (7%)
    JCAR021 33 (7%)
    TECARTUS 32 (7%)
    ABECMA 22 (5%)
    Investigational BCMA product 22 (5%)
    KYMRIAH 13 (3%)
CAR T-cell product category
    Investigational CAR T-cell product 223 (46%)
    Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel) 126 (26%)
    Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel) 77 (16%)
    Commercial BCMA CAR T-cell product (cilta-cel, ide-cel) 57 (12%)
Costimulatory domain
    4-1BB 323 (67%)
    CD28 126 (26%)
    CD28-41BB 34 (7%)
1 n (%)

Test cohort

Show code
df %>% 
  filter(upn %in% df_test$upn) %>% 
  select(
    cart_age,
    gender_desc,
    racenih,
    ethnih,
    prior_hct,
    disease_cat,
    anc_pre_ld,
    hb_pre_ld, 
    plt_pre_ld,
    LD_regimen_cat,
    product_infused,
    product_cat,
    costim_domain
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()
Characteristic N = 2081
Age
    Median (IQR) 61 (53, 67)
    Range 19 - 79
Sex
    Male 131 (63%)
    Female 77 (37%)
Race
    White 170 (82%)
    Asian 23 (11%)
    Black or African American 5 (2%)
    Unknown 4 (2%)
    American Indian or Alaska Native 2 (1%)
    Multiple 2 (1%)
    Native Hawaiian or other Pacific Islander 2 (1%)
Ethnicity
    Not Hispanic/Latino or Unknown 193 (93%)
    Hispanic or Latino 15 (7%)
Prior HCT 73 (35%)
Disease
    Aggressive NHL 102 (49%)
    Indolent NHL 44 (21%)
    MM/PCL 41 (20%)
    ALL 21 (10%)
Pre-LD ANC
    Median (IQR) 2.77 (1.58, 4.60)
    Range 0.02 - 54.03
Pre-LD Hb
    Median (IQR) 10.70 (9.60, 12.42)
    Range 7.00 - 15.60
Pre-LD platelet count
    Median (IQR) 139 (82, 201)
    Range 7 - 453
Lymphodepletion regimen
    Low-intensity Cy/Flu 119 (57%)
    High-intensity Cy/Flu 78 (38%)
    Other 11 (5%)
CAR T-cell product
    JCAR014 59 (28%)
    YESCARTA 46 (22%)
    BREYANZI 23 (11%)
    CARVYKTI 17 (8%)
    Investigational BCMA product 16 (8%)
    TECARTUS 15 (7%)
    Investigational CD20 product 11 (5%)
    JCAR021 11 (5%)
    ABECMA 8 (4%)
    KYMRIAH 2 (1%)
CAR T-cell product category
    Investigational CAR T-cell product 97 (47%)
    Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel) 61 (29%)
    Commercial BCMA CAR T-cell product (cilta-cel, ide-cel) 25 (12%)
    Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel) 25 (12%)
Costimulatory domain
    4-1BB 136 (65%)
    CD28 61 (29%)
    CD28-41BB 11 (5%)
1 n (%)

5th and 95th percentiles of numeric variables in eIPMs

Show code
df %>% 
  filter(upn %in% df_train$upn) %>% 
  plyr::summarize(
    anc_pre_ld_5p = quantile(anc_pre_ld, 0.05, na.rm = TRUE),
    plt_pre_ld_5p = quantile(plt_pre_ld, 0.05, na.rm = TRUE),
    ldh_pre_ld_5p = quantile(ldh_pre_ld, 0.05, na.rm = TRUE),
    ferritin_pre_ld_5p = quantile(ferritin_pre_ld, 0.05, na.rm = TRUE),
    ferritin_day_3_5p = quantile(ferritin_day_3, 0.05, na.rm = TRUE)
  )
  anc_pre_ld_5p plt_pre_ld_5p ldh_pre_ld_5p ferritin_pre_ld_5p
1          0.71          24.1        114.35               27.1
  ferritin_day_3_5p
1                68
Show code
df %>% 
  filter(upn %in% df_train$upn) %>% 
  plyr::summarize(
    anc_pre_ld_95p = quantile(anc_pre_ld, 0.95, na.rm = TRUE),
    plt_pre_ld_95p = quantile(plt_pre_ld, 0.95, na.rm = TRUE),
    ldh_pre_ld_95p = quantile(ldh_pre_ld, 0.95, na.rm = TRUE),
    ferritin_pre_ld_95p = quantile(ferritin_pre_ld, 0.95, na.rm = TRUE),
    ferritin_day_3_95p = quantile(ferritin_day_3, 0.95, na.rm = TRUE)
  )
  anc_pre_ld_95p plt_pre_ld_95p ldh_pre_ld_95p ferritin_pre_ld_95p
1         7.9815          287.4         644.75              3904.1
  ferritin_day_3_95p
1            4931.45

Computational environment

Show code
devtools::session_info() 
─ Session info ─────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.3 (2024-02-29)
 os       macOS Sonoma 14.5
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Los_Angeles
 date     2024-07-02
 pandoc   3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)

─ Packages ─────────────────────────────────────────────────────────
 package       * version    date (UTC) lib source
 backports       1.4.1      2021-12-13 [1] CRAN (R 4.3.0)
 base64enc       0.1-3      2015-07-28 [1] CRAN (R 4.3.0)
 broom         * 1.0.5      2023-06-09 [1] CRAN (R 4.3.0)
 broom.helpers   1.14.0     2023-08-07 [1] CRAN (R 4.3.0)
 bslib           0.7.0      2024-03-29 [1] CRAN (R 4.3.1)
 cachem          1.0.8      2023-05-01 [1] CRAN (R 4.3.0)
 caret         * 6.0-94     2023-03-21 [1] CRAN (R 4.3.0)
 checkmate       2.3.1      2023-12-04 [1] CRAN (R 4.3.1)
 class           7.3-22     2023-05-03 [1] CRAN (R 4.3.3)
 cli             3.6.2      2023-12-11 [1] CRAN (R 4.3.1)
 cluster         2.1.6      2023-12-01 [1] CRAN (R 4.3.3)
 codetools       0.2-19     2023-02-01 [1] CRAN (R 4.3.3)
 colorspace      2.1-0      2023-01-23 [1] CRAN (R 4.3.0)
 commonmark      1.9.1      2024-01-30 [1] CRAN (R 4.3.1)
 crosstalk       1.2.1      2023-11-23 [1] CRAN (R 4.3.1)
 cutpointr     * 1.1.2      2022-04-13 [1] CRAN (R 4.3.0)
 data.table      1.15.4     2024-03-30 [1] CRAN (R 4.3.1)
 dcurves       * 0.4.0      2022-12-23 [1] CRAN (R 4.3.0)
 devtools        2.4.5      2022-10-11 [1] CRAN (R 4.3.0)
 dials         * 1.2.1      2024-02-22 [1] CRAN (R 4.3.1)
 DiceDesign      1.10       2023-12-07 [1] CRAN (R 4.3.1)
 digest          0.6.35     2024-03-11 [1] CRAN (R 4.3.1)
 distill       * 1.6        2023-10-06 [1] CRAN (R 4.3.1)
 doParallel      1.0.17     2022-02-07 [1] CRAN (R 4.3.0)
 downlit         0.4.3      2023-06-29 [1] CRAN (R 4.3.0)
 dplyr         * 1.1.4      2023-11-17 [1] CRAN (R 4.3.1)
 e1071           1.7-14     2023-12-06 [1] CRAN (R 4.3.1)
 ellipsis        0.3.2      2021-04-29 [1] CRAN (R 4.3.0)
 evaluate        0.23       2023-11-01 [1] CRAN (R 4.3.1)
 fansi           1.0.6      2023-12-08 [1] CRAN (R 4.3.1)
 farver          2.1.1      2022-07-06 [1] CRAN (R 4.3.0)
 fastmap         1.1.1      2023-02-24 [1] CRAN (R 4.3.0)
 forcats       * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
 foreach         1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
 foreign         0.8-86     2023-11-28 [1] CRAN (R 4.3.3)
 Formula         1.2-5      2023-02-24 [1] CRAN (R 4.3.0)
 fs              1.6.4      2024-04-25 [1] CRAN (R 4.3.1)
 furrr           0.3.1      2022-08-15 [1] CRAN (R 4.3.0)
 future          1.33.2     2024-03-26 [1] CRAN (R 4.3.1)
 future.apply    1.11.2     2024-03-28 [1] CRAN (R 4.3.1)
 generics        0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
 ggeasy          0.1.4      2023-03-12 [1] CRAN (R 4.3.0)
 ggplot2       * 3.5.0      2024-02-23 [1] CRAN (R 4.3.1)
 ggrepel       * 0.9.5      2024-01-10 [1] CRAN (R 4.3.1)
 ggsurvfit     * 1.0.0      2023-10-31 [1] CRAN (R 4.3.1)
 glmnet        * 4.1-8      2023-08-22 [1] CRAN (R 4.3.0)
 globals         0.16.3     2024-03-08 [1] CRAN (R 4.3.1)
 glue            1.7.0      2024-01-09 [1] CRAN (R 4.3.1)
 gower           1.0.1      2022-12-22 [1] CRAN (R 4.3.0)
 GPfit           1.0-8      2019-02-08 [1] CRAN (R 4.3.0)
 gridExtra       2.3        2017-09-09 [1] CRAN (R 4.3.0)
 gt              0.10.1     2024-01-17 [1] CRAN (R 4.3.1)
 gtable          0.3.4      2023-08-21 [1] CRAN (R 4.3.0)
 gtsummary     * 1.7.2      2023-07-15 [1] CRAN (R 4.3.0)
 hardhat         1.3.1      2024-02-02 [1] CRAN (R 4.3.1)
 haven           2.5.4      2023-11-30 [1] CRAN (R 4.3.1)
 heatwaveR       0.4.6      2021-10-27 [1] CRAN (R 4.3.0)
 here          * 1.0.1      2020-12-13 [1] CRAN (R 4.3.0)
 highr           0.10       2022-12-22 [1] CRAN (R 4.3.0)
 Hmisc         * 5.1-2      2024-03-11 [1] CRAN (R 4.3.1)
 hms             1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
 htmlTable       2.4.2      2023-10-29 [1] CRAN (R 4.3.1)
 htmltools       0.5.8.1    2024-04-04 [1] CRAN (R 4.3.1)
 htmlwidgets     1.6.4      2023-12-06 [1] CRAN (R 4.3.1)
 httpuv          1.6.15     2024-03-26 [1] CRAN (R 4.3.1)
 httr            1.4.7      2023-08-15 [1] CRAN (R 4.3.0)
 infer         * 1.0.7      2024-03-25 [1] CRAN (R 4.3.1)
 ipred           0.9-14     2023-03-09 [1] CRAN (R 4.3.0)
 iterators       1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
 janitor       * 2.2.0      2023-02-02 [1] CRAN (R 4.3.0)
 jquerylib       0.1.4      2021-04-26 [1] CRAN (R 4.3.0)
 jsonlite        1.8.8      2023-12-04 [1] CRAN (R 4.3.1)
 knitr         * 1.45       2023-10-30 [1] CRAN (R 4.3.1)
 labeling        0.4.3      2023-08-29 [1] CRAN (R 4.3.0)
 labelled      * 2.12.0     2023-06-21 [1] CRAN (R 4.3.0)
 later           1.3.2      2023-12-06 [1] CRAN (R 4.3.1)
 lattice       * 0.22-5     2023-10-24 [1] CRAN (R 4.3.3)
 lava            1.8.0      2024-03-05 [1] CRAN (R 4.3.1)
 lazyeval        0.2.2      2019-03-15 [1] CRAN (R 4.3.0)
 lhs             1.1.6      2022-12-17 [1] CRAN (R 4.3.0)
 lifecycle       1.0.4      2023-11-07 [1] CRAN (R 4.3.1)
 listenv         0.9.1      2024-01-29 [1] CRAN (R 4.3.1)
 lubridate     * 1.9.3      2023-09-27 [1] CRAN (R 4.3.1)
 magrittr        2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
 markdown        1.12       2023-12-06 [1] CRAN (R 4.3.1)
 MASS            7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.3)
 Matrix        * 1.6-5      2024-01-11 [1] CRAN (R 4.3.3)
 MatrixModels    0.5-3      2023-11-06 [1] CRAN (R 4.3.1)
 memoise         2.0.1      2021-11-26 [1] CRAN (R 4.3.0)
 mgcv            1.9-1      2023-12-21 [1] CRAN (R 4.3.3)
 mime            0.12       2021-09-28 [1] CRAN (R 4.3.0)
 miniUI          0.1.1.1    2018-05-18 [1] CRAN (R 4.3.0)
 modeldata     * 1.3.0      2024-01-21 [1] CRAN (R 4.3.1)
 ModelMetrics    1.2.2.2    2020-03-17 [1] CRAN (R 4.3.0)
 multcomp        1.4-25     2023-06-20 [1] CRAN (R 4.3.0)
 munsell         0.5.0      2018-06-12 [1] CRAN (R 4.3.0)
 mvtnorm         1.2-4      2023-11-27 [1] CRAN (R 4.3.1)
 nlme            3.1-164    2023-11-27 [1] CRAN (R 4.3.3)
 nnet            7.3-19     2023-05-03 [1] CRAN (R 4.3.3)
 openxlsx      * 4.2.5.2    2023-02-06 [1] CRAN (R 4.3.0)
 parallelly      1.37.1     2024-02-29 [1] CRAN (R 4.3.1)
 parsnip       * 1.2.1      2024-03-22 [1] CRAN (R 4.3.1)
 patchwork     * 1.2.0      2024-01-08 [1] CRAN (R 4.3.1)
 pillar          1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
 pkgbuild        1.4.4      2024-03-17 [1] CRAN (R 4.3.1)
 pkgconfig       2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
 pkgload         1.3.4      2024-01-16 [1] CRAN (R 4.3.1)
 plotly        * 4.10.4     2024-01-13 [1] CRAN (R 4.3.1)
 plyr            1.8.9      2023-10-02 [1] CRAN (R 4.3.1)
 pmsampsize    * 1.1.3      2023-12-06 [1] CRAN (R 4.3.1)
 polspline       1.1.24     2023-10-26 [1] CRAN (R 4.3.1)
 probably      * 1.0.3      2024-02-23 [1] CRAN (R 4.3.1)
 pROC            1.18.5     2023-11-01 [1] CRAN (R 4.3.1)
 prodlim       * 2023.08.28 2023-08-28 [1] CRAN (R 4.3.0)
 profvis         0.3.8      2023-05-02 [1] CRAN (R 4.3.0)
 promises        1.2.1      2023-08-10 [1] CRAN (R 4.3.0)
 proxy           0.4-27     2022-06-09 [1] CRAN (R 4.3.0)
 purrr         * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
 quantreg        5.97       2023-08-19 [1] CRAN (R 4.3.0)
 R6              2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
 Rcpp            1.0.12     2024-01-09 [1] CRAN (R 4.3.1)
 readr         * 2.1.5      2024-01-10 [1] CRAN (R 4.3.1)
 recipes       * 1.0.10     2024-02-18 [1] CRAN (R 4.3.1)
 remotes         2.5.0      2024-03-17 [1] CRAN (R 4.3.1)
 reshape2        1.4.4      2020-04-09 [1] CRAN (R 4.3.0)
 rlang           1.1.3      2024-01-10 [1] CRAN (R 4.3.1)
 rmarkdown     * 2.26       2024-03-05 [1] CRAN (R 4.3.1)
 rms           * 6.8-0      2024-03-11 [1] CRAN (R 4.3.1)
 rpart           4.1.23     2023-12-05 [1] CRAN (R 4.3.3)
 rprojroot       2.0.4      2023-11-05 [1] CRAN (R 4.3.1)
 rsample       * 1.2.1      2024-03-25 [1] CRAN (R 4.3.1)
 rstudioapi      0.16.0     2024-03-24 [1] CRAN (R 4.3.1)
 runway        * 0.1.0      2024-04-02 [1] Github (ML4LHS/runway@e93d55e)
 sandwich        3.1-0      2023-12-11 [1] CRAN (R 4.3.1)
 sass            0.4.9      2024-03-15 [1] CRAN (R 4.3.1)
 scales        * 1.3.0      2023-11-28 [1] CRAN (R 4.3.1)
 sessioninfo     1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
 shape           1.4.6.1    2024-02-23 [1] CRAN (R 4.3.1)
 shiny           1.8.1.1    2024-04-02 [1] CRAN (R 4.3.1)
 snakecase       0.11.1     2023-08-27 [1] CRAN (R 4.3.0)
 SparseM         1.81       2021-02-18 [1] CRAN (R 4.3.0)
 stringi         1.8.3      2023-12-11 [1] CRAN (R 4.3.1)
 stringr       * 1.5.1      2023-11-14 [1] CRAN (R 4.3.1)
 survival        3.5-8      2024-02-14 [1] CRAN (R 4.3.3)
 TH.data         1.1-2      2023-04-17 [1] CRAN (R 4.3.0)
 tibble        * 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
 tidymodels    * 1.2.0      2024-03-25 [1] CRAN (R 4.3.1)
 tidyr         * 1.3.1      2024-01-24 [1] CRAN (R 4.3.1)
 tidyselect      1.2.1      2024-03-11 [1] CRAN (R 4.3.1)
 tidyverse     * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
 timechange      0.3.0      2024-01-18 [1] CRAN (R 4.3.1)
 timeDate        4032.109   2023-12-14 [1] CRAN (R 4.3.1)
 tune          * 1.2.0      2024-03-20 [1] CRAN (R 4.3.1)
 tzdb            0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
 urlchecker      1.0.1      2021-11-30 [1] CRAN (R 4.3.0)
 usethis         2.2.3      2024-02-19 [1] CRAN (R 4.3.1)
 utf8            1.2.4      2023-10-22 [1] CRAN (R 4.3.1)
 vctrs           0.6.5      2023-12-01 [1] CRAN (R 4.3.1)
 vip           * 0.4.1      2023-08-21 [1] CRAN (R 4.3.0)
 viridisLite     0.4.2      2023-05-02 [1] CRAN (R 4.3.0)
 withr           3.0.0      2024-01-16 [1] CRAN (R 4.3.1)
 workflows     * 1.1.4      2024-02-19 [1] CRAN (R 4.3.1)
 workflowsets  * 1.1.0      2024-03-21 [1] CRAN (R 4.3.1)
 xfun            0.43       2024-03-25 [1] CRAN (R 4.3.1)
 xml2            1.3.6      2023-12-04 [1] CRAN (R 4.3.1)
 xtable          1.8-4      2019-04-21 [1] CRAN (R 4.3.0)
 yaml            2.3.8      2023-12-11 [1] CRAN (R 4.3.1)
 yardstick     * 1.3.1      2024-03-21 [1] CRAN (R 4.3.1)
 zip             2.3.1      2024-01-27 [1] CRAN (R 4.3.1)
 zoo           * 1.8-12     2023-04-13 [1] CRAN (R 4.3.0)

 [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library

────────────────────────────────────────────────────────────────────